; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi3:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi4:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi5:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi6:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi7:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi3:
+; NoVLX-NEXT: .Lcfi8:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi4:
+; NoVLX-NEXT: .Lcfi9:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi5:
+; NoVLX-NEXT: .Lcfi10:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi11:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi12:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi13:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi14:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi15:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi6:
+; NoVLX-NEXT: .Lcfi16:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi7:
+; NoVLX-NEXT: .Lcfi17:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi8:
+; NoVLX-NEXT: .Lcfi18:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi19:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi20:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi21:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi22:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi23:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi9:
+; NoVLX-NEXT: .Lcfi24:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi10:
+; NoVLX-NEXT: .Lcfi25:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi11:
+; NoVLX-NEXT: .Lcfi26:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi27:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi28:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi29:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi30:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi31:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi12:
+; NoVLX-NEXT: .Lcfi32:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi13:
+; NoVLX-NEXT: .Lcfi33:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi14:
+; NoVLX-NEXT: .Lcfi34:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi15:
+; NoVLX-NEXT: .Lcfi35:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi16:
+; NoVLX-NEXT: .Lcfi36:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi17:
+; NoVLX-NEXT: .Lcfi37:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi18:
+; NoVLX-NEXT: .Lcfi38:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi19:
+; NoVLX-NEXT: .Lcfi39:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi20:
+; NoVLX-NEXT: .Lcfi40:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi21:
+; NoVLX-NEXT: .Lcfi41:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi22:
+; NoVLX-NEXT: .Lcfi42:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi23:
+; NoVLX-NEXT: .Lcfi43:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi24:
+; NoVLX-NEXT: .Lcfi44:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi25:
+; NoVLX-NEXT: .Lcfi45:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi26:
+; NoVLX-NEXT: .Lcfi46:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi27:
+; NoVLX-NEXT: .Lcfi47:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi28:
+; NoVLX-NEXT: .Lcfi48:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi29:
+; NoVLX-NEXT: .Lcfi49:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi30:
+; NoVLX-NEXT: .Lcfi50:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi31:
+; NoVLX-NEXT: .Lcfi51:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi32:
+; NoVLX-NEXT: .Lcfi52:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi33:
+; NoVLX-NEXT: .Lcfi53:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi34:
+; NoVLX-NEXT: .Lcfi54:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi35:
+; NoVLX-NEXT: .Lcfi55:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi36:
+; NoVLX-NEXT: .Lcfi56:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi37:
+; NoVLX-NEXT: .Lcfi57:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi38:
+; NoVLX-NEXT: .Lcfi58:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi39:
+; NoVLX-NEXT: .Lcfi59:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi40:
+; NoVLX-NEXT: .Lcfi60:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi41:
+; NoVLX-NEXT: .Lcfi61:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi42:
+; NoVLX-NEXT: .Lcfi62:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi43:
+; NoVLX-NEXT: .Lcfi63:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi44:
+; NoVLX-NEXT: .Lcfi64:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi45:
+; NoVLX-NEXT: .Lcfi65:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi46:
+; NoVLX-NEXT: .Lcfi66:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi47:
+; NoVLX-NEXT: .Lcfi67:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi48:
+; NoVLX-NEXT: .Lcfi68:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi49:
+; NoVLX-NEXT: .Lcfi69:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi50:
+; NoVLX-NEXT: .Lcfi70:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi51:
+; NoVLX-NEXT: .Lcfi71:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi52:
+; NoVLX-NEXT: .Lcfi72:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi53:
+; NoVLX-NEXT: .Lcfi73:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi54:
+; NoVLX-NEXT: .Lcfi74:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi55:
+; NoVLX-NEXT: .Lcfi75:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi56:
+; NoVLX-NEXT: .Lcfi76:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi57:
+; NoVLX-NEXT: .Lcfi77:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi58:
+; NoVLX-NEXT: .Lcfi78:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi59:
+; NoVLX-NEXT: .Lcfi79:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi60:
+; NoVLX-NEXT: .Lcfi80:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi61:
+; NoVLX-NEXT: .Lcfi81:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi62:
+; NoVLX-NEXT: .Lcfi82:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi63:
+; NoVLX-NEXT: .Lcfi83:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi64:
+; NoVLX-NEXT: .Lcfi84:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi65:
+; NoVLX-NEXT: .Lcfi85:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi66:
+; NoVLX-NEXT: .Lcfi86:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi67:
+; NoVLX-NEXT: .Lcfi87:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi68:
+; NoVLX-NEXT: .Lcfi88:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi69:
+; NoVLX-NEXT: .Lcfi89:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi70:
+; NoVLX-NEXT: .Lcfi90:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi71:
+; NoVLX-NEXT: .Lcfi91:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi72:
+; NoVLX-NEXT: .Lcfi92:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi73:
+; NoVLX-NEXT: .Lcfi93:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi74:
+; NoVLX-NEXT: .Lcfi94:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi75:
+; NoVLX-NEXT: .Lcfi95:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi76:
+; NoVLX-NEXT: .Lcfi96:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi77:
+; NoVLX-NEXT: .Lcfi97:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi78:
+; NoVLX-NEXT: .Lcfi98:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi79:
+; NoVLX-NEXT: .Lcfi99:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi80:
+; NoVLX-NEXT: .Lcfi100:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi81:
+; NoVLX-NEXT: .Lcfi101:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi82:
+; NoVLX-NEXT: .Lcfi102:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi103:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi104:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi105:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi106:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi107:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi83:
+; NoVLX-NEXT: .Lcfi108:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi84:
+; NoVLX-NEXT: .Lcfi109:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi85:
+; NoVLX-NEXT: .Lcfi110:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi111:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi112:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi113:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi114:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi115:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi86:
+; NoVLX-NEXT: .Lcfi116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi87:
+; NoVLX-NEXT: .Lcfi117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi88:
+; NoVLX-NEXT: .Lcfi118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi119:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi120:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi121:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi122:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi123:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi89:
+; NoVLX-NEXT: .Lcfi124:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi90:
+; NoVLX-NEXT: .Lcfi125:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi91:
+; NoVLX-NEXT: .Lcfi126:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi127:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi128:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi129:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi130:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi131:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi92:
+; NoVLX-NEXT: .Lcfi132:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi93:
+; NoVLX-NEXT: .Lcfi133:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi94:
+; NoVLX-NEXT: .Lcfi134:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi95:
+; NoVLX-NEXT: .Lcfi135:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi96:
+; NoVLX-NEXT: .Lcfi136:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi97:
+; NoVLX-NEXT: .Lcfi137:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi98:
+; NoVLX-NEXT: .Lcfi138:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi99:
+; NoVLX-NEXT: .Lcfi139:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi100:
+; NoVLX-NEXT: .Lcfi140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi101:
+; NoVLX-NEXT: .Lcfi141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi102:
+; NoVLX-NEXT: .Lcfi142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi103:
+; NoVLX-NEXT: .Lcfi143:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi104:
+; NoVLX-NEXT: .Lcfi144:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi105:
+; NoVLX-NEXT: .Lcfi145:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi106:
+; NoVLX-NEXT: .Lcfi146:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi107:
+; NoVLX-NEXT: .Lcfi147:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi108:
+; NoVLX-NEXT: .Lcfi148:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi109:
+; NoVLX-NEXT: .Lcfi149:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi110:
+; NoVLX-NEXT: .Lcfi150:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi111:
+; NoVLX-NEXT: .Lcfi151:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi112:
+; NoVLX-NEXT: .Lcfi152:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi113:
+; NoVLX-NEXT: .Lcfi153:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi114:
+; NoVLX-NEXT: .Lcfi154:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi115:
+; NoVLX-NEXT: .Lcfi155:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi116:
+; NoVLX-NEXT: .Lcfi156:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi117:
+; NoVLX-NEXT: .Lcfi157:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi118:
+; NoVLX-NEXT: .Lcfi158:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi119:
+; NoVLX-NEXT: .Lcfi159:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi120:
+; NoVLX-NEXT: .Lcfi160:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi121:
+; NoVLX-NEXT: .Lcfi161:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi122:
+; NoVLX-NEXT: .Lcfi162:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi123:
+; NoVLX-NEXT: .Lcfi163:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi124:
+; NoVLX-NEXT: .Lcfi164:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi125:
+; NoVLX-NEXT: .Lcfi165:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi126:
+; NoVLX-NEXT: .Lcfi166:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm3, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi127:
+; NoVLX-NEXT: .Lcfi167:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi128:
+; NoVLX-NEXT: .Lcfi168:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi129:
+; NoVLX-NEXT: .Lcfi169:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi130:
+; NoVLX-NEXT: .Lcfi170:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi131:
+; NoVLX-NEXT: .Lcfi171:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi132:
+; NoVLX-NEXT: .Lcfi172:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm6
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm4
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi133:
+; NoVLX-NEXT: .Lcfi173:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi134:
+; NoVLX-NEXT: .Lcfi174:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi135:
+; NoVLX-NEXT: .Lcfi175:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi136:
+; NoVLX-NEXT: .Lcfi176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi137:
+; NoVLX-NEXT: .Lcfi177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi138:
+; NoVLX-NEXT: .Lcfi178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi139:
+; NoVLX-NEXT: .Lcfi179:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi140:
+; NoVLX-NEXT: .Lcfi180:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi141:
+; NoVLX-NEXT: .Lcfi181:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi142:
+; NoVLX-NEXT: .Lcfi182:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi143:
+; NoVLX-NEXT: .Lcfi183:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi144:
+; NoVLX-NEXT: .Lcfi184:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi145:
+; NoVLX-NEXT: .Lcfi185:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi146:
+; NoVLX-NEXT: .Lcfi186:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi147:
+; NoVLX-NEXT: .Lcfi187:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi148:
+; NoVLX-NEXT: .Lcfi188:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi149:
+; NoVLX-NEXT: .Lcfi189:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi150:
+; NoVLX-NEXT: .Lcfi190:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi151:
+; NoVLX-NEXT: .Lcfi191:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi152:
+; NoVLX-NEXT: .Lcfi192:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi153:
+; NoVLX-NEXT: .Lcfi193:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi154:
+; NoVLX-NEXT: .Lcfi194:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi155:
+; NoVLX-NEXT: .Lcfi195:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi156:
+; NoVLX-NEXT: .Lcfi196:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi157:
+; NoVLX-NEXT: .Lcfi197:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi158:
+; NoVLX-NEXT: .Lcfi198:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi159:
+; NoVLX-NEXT: .Lcfi199:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi160:
+; NoVLX-NEXT: .Lcfi200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi161:
+; NoVLX-NEXT: .Lcfi201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi162:
+; NoVLX-NEXT: .Lcfi202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi163:
+; NoVLX-NEXT: .Lcfi203:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi164:
+; NoVLX-NEXT: .Lcfi204:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi165:
+; NoVLX-NEXT: .Lcfi205:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi166:
+; NoVLX-NEXT: .Lcfi206:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi167:
+; NoVLX-NEXT: .Lcfi207:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi168:
+; NoVLX-NEXT: .Lcfi208:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi169:
+; NoVLX-NEXT: .Lcfi209:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi170:
+; NoVLX-NEXT: .Lcfi210:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi171:
+; NoVLX-NEXT: .Lcfi211:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi172:
+; NoVLX-NEXT: .Lcfi212:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi173:
+; NoVLX-NEXT: .Lcfi213:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi174:
+; NoVLX-NEXT: .Lcfi214:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi175:
+; NoVLX-NEXT: .Lcfi215:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi176:
+; NoVLX-NEXT: .Lcfi216:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi177:
+; NoVLX-NEXT: .Lcfi217:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi178:
+; NoVLX-NEXT: .Lcfi218:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi179:
+; NoVLX-NEXT: .Lcfi219:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi180:
+; NoVLX-NEXT: .Lcfi220:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi181:
+; NoVLX-NEXT: .Lcfi221:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi182:
+; NoVLX-NEXT: .Lcfi222:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi183:
+; NoVLX-NEXT: .Lcfi223:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi184:
+; NoVLX-NEXT: .Lcfi224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi185:
+; NoVLX-NEXT: .Lcfi225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi186:
+; NoVLX-NEXT: .Lcfi226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi187:
+; NoVLX-NEXT: .Lcfi227:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi188:
+; NoVLX-NEXT: .Lcfi228:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi189:
+; NoVLX-NEXT: .Lcfi229:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi190:
+; NoVLX-NEXT: .Lcfi230:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi191:
+; NoVLX-NEXT: .Lcfi231:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi192:
+; NoVLX-NEXT: .Lcfi232:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi193:
+; NoVLX-NEXT: .Lcfi233:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi194:
+; NoVLX-NEXT: .Lcfi234:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi195:
+; NoVLX-NEXT: .Lcfi235:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi196:
+; NoVLX-NEXT: .Lcfi236:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi197:
+; NoVLX-NEXT: .Lcfi237:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi198:
+; NoVLX-NEXT: .Lcfi238:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi199:
+; NoVLX-NEXT: .Lcfi239:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi200:
+; NoVLX-NEXT: .Lcfi240:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi201:
+; NoVLX-NEXT: .Lcfi241:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi202:
+; NoVLX-NEXT: .Lcfi242:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi203:
+; NoVLX-NEXT: .Lcfi243:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi204:
+; NoVLX-NEXT: .Lcfi244:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi205:
+; NoVLX-NEXT: .Lcfi245:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi206:
+; NoVLX-NEXT: .Lcfi246:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi207:
+; NoVLX-NEXT: .Lcfi247:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi208:
+; NoVLX-NEXT: .Lcfi248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi209:
+; NoVLX-NEXT: .Lcfi249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi210:
+; NoVLX-NEXT: .Lcfi250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi251:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi252:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi253:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi254:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi255:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi211:
+; NoVLX-NEXT: .Lcfi256:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi212:
+; NoVLX-NEXT: .Lcfi257:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi213:
+; NoVLX-NEXT: .Lcfi258:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi259:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi260:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi261:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi262:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi263:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi214:
+; NoVLX-NEXT: .Lcfi264:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi215:
+; NoVLX-NEXT: .Lcfi265:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi216:
+; NoVLX-NEXT: .Lcfi266:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi267:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi268:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi269:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi270:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi271:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi217:
+; NoVLX-NEXT: .Lcfi272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi218:
+; NoVLX-NEXT: .Lcfi273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi219:
+; NoVLX-NEXT: .Lcfi274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi275:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi276:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi277:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi278:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi279:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__a to <16 x i32>
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi220:
+; NoVLX-NEXT: .Lcfi280:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi221:
+; NoVLX-NEXT: .Lcfi281:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi222:
+; NoVLX-NEXT: .Lcfi282:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi283:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi284:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi285:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi286:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi287:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi223:
+; NoVLX-NEXT: .Lcfi288:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi224:
+; NoVLX-NEXT: .Lcfi289:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi225:
+; NoVLX-NEXT: .Lcfi290:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi291:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi292:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi293:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi294:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi295:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi226:
+; NoVLX-NEXT: .Lcfi296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi227:
+; NoVLX-NEXT: .Lcfi297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi228:
+; NoVLX-NEXT: .Lcfi298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi229:
+; NoVLX-NEXT: .Lcfi299:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi230:
+; NoVLX-NEXT: .Lcfi300:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi231:
+; NoVLX-NEXT: .Lcfi301:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi232:
+; NoVLX-NEXT: .Lcfi302:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi233:
+; NoVLX-NEXT: .Lcfi303:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi234:
+; NoVLX-NEXT: .Lcfi304:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi235:
+; NoVLX-NEXT: .Lcfi305:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi236:
+; NoVLX-NEXT: .Lcfi306:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi237:
+; NoVLX-NEXT: .Lcfi307:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi238:
+; NoVLX-NEXT: .Lcfi308:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi239:
+; NoVLX-NEXT: .Lcfi309:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi240:
+; NoVLX-NEXT: .Lcfi310:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi241:
+; NoVLX-NEXT: .Lcfi311:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi242:
+; NoVLX-NEXT: .Lcfi312:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi243:
+; NoVLX-NEXT: .Lcfi313:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi244:
+; NoVLX-NEXT: .Lcfi314:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi245:
+; NoVLX-NEXT: .Lcfi315:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi246:
+; NoVLX-NEXT: .Lcfi316:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi247:
+; NoVLX-NEXT: .Lcfi317:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi248:
+; NoVLX-NEXT: .Lcfi318:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi249:
+; NoVLX-NEXT: .Lcfi319:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi250:
+; NoVLX-NEXT: .Lcfi320:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi251:
+; NoVLX-NEXT: .Lcfi321:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi252:
+; NoVLX-NEXT: .Lcfi322:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi253:
+; NoVLX-NEXT: .Lcfi323:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi254:
+; NoVLX-NEXT: .Lcfi324:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi255:
+; NoVLX-NEXT: .Lcfi325:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi256:
+; NoVLX-NEXT: .Lcfi326:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi257:
+; NoVLX-NEXT: .Lcfi327:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi258:
+; NoVLX-NEXT: .Lcfi328:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi259:
+; NoVLX-NEXT: .Lcfi329:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi260:
+; NoVLX-NEXT: .Lcfi330:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi261:
+; NoVLX-NEXT: .Lcfi331:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi262:
+; NoVLX-NEXT: .Lcfi332:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi263:
+; NoVLX-NEXT: .Lcfi333:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi264:
+; NoVLX-NEXT: .Lcfi334:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi265:
+; NoVLX-NEXT: .Lcfi335:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi266:
+; NoVLX-NEXT: .Lcfi336:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi267:
+; NoVLX-NEXT: .Lcfi337:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi268:
+; NoVLX-NEXT: .Lcfi338:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi269:
+; NoVLX-NEXT: .Lcfi339:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi270:
+; NoVLX-NEXT: .Lcfi340:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi271:
+; NoVLX-NEXT: .Lcfi341:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi272:
+; NoVLX-NEXT: .Lcfi342:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi273:
+; NoVLX-NEXT: .Lcfi343:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi274:
+; NoVLX-NEXT: .Lcfi344:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi275:
+; NoVLX-NEXT: .Lcfi345:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi276:
+; NoVLX-NEXT: .Lcfi346:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi277:
+; NoVLX-NEXT: .Lcfi347:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi278:
+; NoVLX-NEXT: .Lcfi348:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi279:
+; NoVLX-NEXT: .Lcfi349:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi280:
+; NoVLX-NEXT: .Lcfi350:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi281:
+; NoVLX-NEXT: .Lcfi351:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi282:
+; NoVLX-NEXT: .Lcfi352:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi283:
+; NoVLX-NEXT: .Lcfi353:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi284:
+; NoVLX-NEXT: .Lcfi354:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi285:
+; NoVLX-NEXT: .Lcfi355:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi286:
+; NoVLX-NEXT: .Lcfi356:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi287:
+; NoVLX-NEXT: .Lcfi357:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi288:
+; NoVLX-NEXT: .Lcfi358:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi289:
+; NoVLX-NEXT: .Lcfi359:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi290:
+; NoVLX-NEXT: .Lcfi360:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi291:
+; NoVLX-NEXT: .Lcfi361:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi292:
+; NoVLX-NEXT: .Lcfi362:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi293:
+; NoVLX-NEXT: .Lcfi363:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi294:
+; NoVLX-NEXT: .Lcfi364:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi295:
+; NoVLX-NEXT: .Lcfi365:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi296:
+; NoVLX-NEXT: .Lcfi366:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi297:
+; NoVLX-NEXT: .Lcfi367:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi298:
+; NoVLX-NEXT: .Lcfi368:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi299:
+; NoVLX-NEXT: .Lcfi369:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi300:
+; NoVLX-NEXT: .Lcfi370:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi301:
+; NoVLX-NEXT: .Lcfi371:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi302:
+; NoVLX-NEXT: .Lcfi372:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi303:
+; NoVLX-NEXT: .Lcfi373:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi304:
+; NoVLX-NEXT: .Lcfi374:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi305:
+; NoVLX-NEXT: .Lcfi375:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi306:
+; NoVLX-NEXT: .Lcfi376:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi307:
+; NoVLX-NEXT: .Lcfi377:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi308:
+; NoVLX-NEXT: .Lcfi378:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi309:
+; NoVLX-NEXT: .Lcfi379:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi310:
+; NoVLX-NEXT: .Lcfi380:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi311:
+; NoVLX-NEXT: .Lcfi381:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi312:
+; NoVLX-NEXT: .Lcfi382:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi313:
+; NoVLX-NEXT: .Lcfi383:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi314:
+; NoVLX-NEXT: .Lcfi384:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi315:
+; NoVLX-NEXT: .Lcfi385:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi316:
+; NoVLX-NEXT: .Lcfi386:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi317:
+; NoVLX-NEXT: .Lcfi387:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi318:
+; NoVLX-NEXT: .Lcfi388:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi319:
+; NoVLX-NEXT: .Lcfi389:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi320:
+; NoVLX-NEXT: .Lcfi390:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi321:
+; NoVLX-NEXT: .Lcfi391:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi322:
+; NoVLX-NEXT: .Lcfi392:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi323:
+; NoVLX-NEXT: .Lcfi393:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi324:
+; NoVLX-NEXT: .Lcfi394:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi325:
+; NoVLX-NEXT: .Lcfi395:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi326:
+; NoVLX-NEXT: .Lcfi396:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi327:
+; NoVLX-NEXT: .Lcfi397:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi328:
+; NoVLX-NEXT: .Lcfi398:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi329:
+; NoVLX-NEXT: .Lcfi399:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi330:
+; NoVLX-NEXT: .Lcfi400:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi331:
+; NoVLX-NEXT: .Lcfi401:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi332:
+; NoVLX-NEXT: .Lcfi402:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi333:
+; NoVLX-NEXT: .Lcfi403:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi334:
+; NoVLX-NEXT: .Lcfi404:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi335:
+; NoVLX-NEXT: .Lcfi405:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi336:
+; NoVLX-NEXT: .Lcfi406:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi337:
+; NoVLX-NEXT: .Lcfi407:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi338:
+; NoVLX-NEXT: .Lcfi408:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi339:
+; NoVLX-NEXT: .Lcfi409:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi340:
+; NoVLX-NEXT: .Lcfi410:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi341:
+; NoVLX-NEXT: .Lcfi411:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi342:
+; NoVLX-NEXT: .Lcfi412:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi343:
+; NoVLX-NEXT: .Lcfi413:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi344:
+; NoVLX-NEXT: .Lcfi414:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi345:
+; NoVLX-NEXT: .Lcfi415:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi346:
+; NoVLX-NEXT: .Lcfi416:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi347:
+; NoVLX-NEXT: .Lcfi417:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi348:
+; NoVLX-NEXT: .Lcfi418:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi349:
+; NoVLX-NEXT: .Lcfi419:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi350:
+; NoVLX-NEXT: .Lcfi420:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi351:
+; NoVLX-NEXT: .Lcfi421:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi352:
+; NoVLX-NEXT: .Lcfi422:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi353:
+; NoVLX-NEXT: .Lcfi423:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi354:
+; NoVLX-NEXT: .Lcfi424:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi355:
+; NoVLX-NEXT: .Lcfi425:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi356:
+; NoVLX-NEXT: .Lcfi426:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi357:
+; NoVLX-NEXT: .Lcfi427:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi358:
+; NoVLX-NEXT: .Lcfi428:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi359:
+; NoVLX-NEXT: .Lcfi429:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi360:
+; NoVLX-NEXT: .Lcfi430:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi361:
+; NoVLX-NEXT: .Lcfi431:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi362:
+; NoVLX-NEXT: .Lcfi432:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi363:
+; NoVLX-NEXT: .Lcfi433:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi364:
+; NoVLX-NEXT: .Lcfi434:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi365:
+; NoVLX-NEXT: .Lcfi435:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi366:
+; NoVLX-NEXT: .Lcfi436:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi367:
+; NoVLX-NEXT: .Lcfi437:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi368:
+; NoVLX-NEXT: .Lcfi438:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi369:
+; NoVLX-NEXT: .Lcfi439:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi370:
+; NoVLX-NEXT: .Lcfi440:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi371:
+; NoVLX-NEXT: .Lcfi441:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi372:
+; NoVLX-NEXT: .Lcfi442:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi373:
+; NoVLX-NEXT: .Lcfi443:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi374:
+; NoVLX-NEXT: .Lcfi444:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi375:
+; NoVLX-NEXT: .Lcfi445:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi376:
+; NoVLX-NEXT: .Lcfi446:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi377:
+; NoVLX-NEXT: .Lcfi447:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi378:
+; NoVLX-NEXT: .Lcfi448:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi379:
+; NoVLX-NEXT: .Lcfi449:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi380:
+; NoVLX-NEXT: .Lcfi450:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi381:
+; NoVLX-NEXT: .Lcfi451:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi382:
+; NoVLX-NEXT: .Lcfi452:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi383:
+; NoVLX-NEXT: .Lcfi453:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi384:
+; NoVLX-NEXT: .Lcfi454:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi455:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi456:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi457:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi458:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi459:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi385:
+; NoVLX-NEXT: .Lcfi460:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi386:
+; NoVLX-NEXT: .Lcfi461:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi387:
+; NoVLX-NEXT: .Lcfi462:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi463:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi464:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi465:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi466:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi467:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi388:
+; NoVLX-NEXT: .Lcfi468:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi389:
+; NoVLX-NEXT: .Lcfi469:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi390:
+; NoVLX-NEXT: .Lcfi470:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi471:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi472:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi473:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi474:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi475:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi391:
+; NoVLX-NEXT: .Lcfi476:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi392:
+; NoVLX-NEXT: .Lcfi477:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi393:
+; NoVLX-NEXT: .Lcfi478:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi479:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi480:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi481:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi482:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi483:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi394:
+; NoVLX-NEXT: .Lcfi484:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi395:
+; NoVLX-NEXT: .Lcfi485:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi396:
+; NoVLX-NEXT: .Lcfi486:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi397:
+; NoVLX-NEXT: .Lcfi487:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi398:
+; NoVLX-NEXT: .Lcfi488:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi399:
+; NoVLX-NEXT: .Lcfi489:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi400:
+; NoVLX-NEXT: .Lcfi490:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi401:
+; NoVLX-NEXT: .Lcfi491:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi402:
+; NoVLX-NEXT: .Lcfi492:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi403:
+; NoVLX-NEXT: .Lcfi493:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi404:
+; NoVLX-NEXT: .Lcfi494:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi405:
+; NoVLX-NEXT: .Lcfi495:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi406:
+; NoVLX-NEXT: .Lcfi496:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi407:
+; NoVLX-NEXT: .Lcfi497:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi408:
+; NoVLX-NEXT: .Lcfi498:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi409:
+; NoVLX-NEXT: .Lcfi499:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi410:
+; NoVLX-NEXT: .Lcfi500:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi411:
+; NoVLX-NEXT: .Lcfi501:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi412:
+; NoVLX-NEXT: .Lcfi502:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi413:
+; NoVLX-NEXT: .Lcfi503:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi414:
+; NoVLX-NEXT: .Lcfi504:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi415:
+; NoVLX-NEXT: .Lcfi505:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi416:
+; NoVLX-NEXT: .Lcfi506:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi417:
+; NoVLX-NEXT: .Lcfi507:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi418:
+; NoVLX-NEXT: .Lcfi508:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi419:
+; NoVLX-NEXT: .Lcfi509:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi420:
+; NoVLX-NEXT: .Lcfi510:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi421:
+; NoVLX-NEXT: .Lcfi511:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi422:
+; NoVLX-NEXT: .Lcfi512:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi423:
+; NoVLX-NEXT: .Lcfi513:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi424:
+; NoVLX-NEXT: .Lcfi514:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi425:
+; NoVLX-NEXT: .Lcfi515:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi426:
+; NoVLX-NEXT: .Lcfi516:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi427:
+; NoVLX-NEXT: .Lcfi517:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi428:
+; NoVLX-NEXT: .Lcfi518:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi429:
+; NoVLX-NEXT: .Lcfi519:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi430:
+; NoVLX-NEXT: .Lcfi520:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi431:
+; NoVLX-NEXT: .Lcfi521:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi432:
+; NoVLX-NEXT: .Lcfi522:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi433:
+; NoVLX-NEXT: .Lcfi523:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi434:
+; NoVLX-NEXT: .Lcfi524:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi435:
+; NoVLX-NEXT: .Lcfi525:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi436:
+; NoVLX-NEXT: .Lcfi526:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi437:
+; NoVLX-NEXT: .Lcfi527:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi438:
+; NoVLX-NEXT: .Lcfi528:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi439:
+; NoVLX-NEXT: .Lcfi529:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi440:
+; NoVLX-NEXT: .Lcfi530:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi441:
+; NoVLX-NEXT: .Lcfi531:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi442:
+; NoVLX-NEXT: .Lcfi532:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi443:
+; NoVLX-NEXT: .Lcfi533:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi444:
+; NoVLX-NEXT: .Lcfi534:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi445:
+; NoVLX-NEXT: .Lcfi535:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi446:
+; NoVLX-NEXT: .Lcfi536:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi447:
+; NoVLX-NEXT: .Lcfi537:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi448:
+; NoVLX-NEXT: .Lcfi538:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi449:
+; NoVLX-NEXT: .Lcfi539:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi450:
+; NoVLX-NEXT: .Lcfi540:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi451:
+; NoVLX-NEXT: .Lcfi541:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi452:
+; NoVLX-NEXT: .Lcfi542:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi453:
+; NoVLX-NEXT: .Lcfi543:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi454:
+; NoVLX-NEXT: .Lcfi544:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi455:
+; NoVLX-NEXT: .Lcfi545:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi456:
+; NoVLX-NEXT: .Lcfi546:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi457:
+; NoVLX-NEXT: .Lcfi547:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi458:
+; NoVLX-NEXT: .Lcfi548:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi459:
+; NoVLX-NEXT: .Lcfi549:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi460:
+; NoVLX-NEXT: .Lcfi550:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi461:
+; NoVLX-NEXT: .Lcfi551:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi462:
+; NoVLX-NEXT: .Lcfi552:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi463:
+; NoVLX-NEXT: .Lcfi553:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi464:
+; NoVLX-NEXT: .Lcfi554:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi555:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi556:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi557:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi558:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi559:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi465:
+; NoVLX-NEXT: .Lcfi560:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi466:
+; NoVLX-NEXT: .Lcfi561:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi467:
+; NoVLX-NEXT: .Lcfi562:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi563:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi564:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi565:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi566:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi567:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi468:
+; NoVLX-NEXT: .Lcfi568:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi469:
+; NoVLX-NEXT: .Lcfi569:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi470:
+; NoVLX-NEXT: .Lcfi570:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi571:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi572:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi573:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi574:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi575:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi471:
+; NoVLX-NEXT: .Lcfi576:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi472:
+; NoVLX-NEXT: .Lcfi577:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi473:
+; NoVLX-NEXT: .Lcfi578:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi579:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi580:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi581:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi582:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi583:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi474:
+; NoVLX-NEXT: .Lcfi584:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi475:
+; NoVLX-NEXT: .Lcfi585:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi476:
+; NoVLX-NEXT: .Lcfi586:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi477:
+; NoVLX-NEXT: .Lcfi587:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi478:
+; NoVLX-NEXT: .Lcfi588:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi479:
+; NoVLX-NEXT: .Lcfi589:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi480:
+; NoVLX-NEXT: .Lcfi590:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi481:
+; NoVLX-NEXT: .Lcfi591:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi482:
+; NoVLX-NEXT: .Lcfi592:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi483:
+; NoVLX-NEXT: .Lcfi593:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi484:
+; NoVLX-NEXT: .Lcfi594:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi485:
+; NoVLX-NEXT: .Lcfi595:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi486:
+; NoVLX-NEXT: .Lcfi596:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi487:
+; NoVLX-NEXT: .Lcfi597:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi488:
+; NoVLX-NEXT: .Lcfi598:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi489:
+; NoVLX-NEXT: .Lcfi599:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi490:
+; NoVLX-NEXT: .Lcfi600:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi491:
+; NoVLX-NEXT: .Lcfi601:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi492:
+; NoVLX-NEXT: .Lcfi602:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi493:
+; NoVLX-NEXT: .Lcfi603:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi494:
+; NoVLX-NEXT: .Lcfi604:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi495:
+; NoVLX-NEXT: .Lcfi605:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi496:
+; NoVLX-NEXT: .Lcfi606:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi497:
+; NoVLX-NEXT: .Lcfi607:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi498:
+; NoVLX-NEXT: .Lcfi608:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi499:
+; NoVLX-NEXT: .Lcfi609:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi500:
+; NoVLX-NEXT: .Lcfi610:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi501:
+; NoVLX-NEXT: .Lcfi611:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi502:
+; NoVLX-NEXT: .Lcfi612:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi503:
+; NoVLX-NEXT: .Lcfi613:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi504:
+; NoVLX-NEXT: .Lcfi614:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi505:
+; NoVLX-NEXT: .Lcfi615:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi506:
+; NoVLX-NEXT: .Lcfi616:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi507:
+; NoVLX-NEXT: .Lcfi617:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi508:
+; NoVLX-NEXT: .Lcfi618:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm3, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi509:
+; NoVLX-NEXT: .Lcfi619:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi510:
+; NoVLX-NEXT: .Lcfi620:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi511:
+; NoVLX-NEXT: .Lcfi621:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi512:
+; NoVLX-NEXT: .Lcfi622:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi513:
+; NoVLX-NEXT: .Lcfi623:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi514:
+; NoVLX-NEXT: .Lcfi624:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm6
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm4
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm1, %ymm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi515:
+; NoVLX-NEXT: .Lcfi625:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi516:
+; NoVLX-NEXT: .Lcfi626:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi517:
+; NoVLX-NEXT: .Lcfi627:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi518:
+; NoVLX-NEXT: .Lcfi628:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi519:
+; NoVLX-NEXT: .Lcfi629:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi520:
+; NoVLX-NEXT: .Lcfi630:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi521:
+; NoVLX-NEXT: .Lcfi631:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi522:
+; NoVLX-NEXT: .Lcfi632:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi523:
+; NoVLX-NEXT: .Lcfi633:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi524:
+; NoVLX-NEXT: .Lcfi634:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi525:
+; NoVLX-NEXT: .Lcfi635:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi526:
+; NoVLX-NEXT: .Lcfi636:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi527:
+; NoVLX-NEXT: .Lcfi637:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi528:
+; NoVLX-NEXT: .Lcfi638:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi529:
+; NoVLX-NEXT: .Lcfi639:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi530:
+; NoVLX-NEXT: .Lcfi640:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi531:
+; NoVLX-NEXT: .Lcfi641:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi532:
+; NoVLX-NEXT: .Lcfi642:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi533:
+; NoVLX-NEXT: .Lcfi643:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi534:
+; NoVLX-NEXT: .Lcfi644:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi535:
+; NoVLX-NEXT: .Lcfi645:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi536:
+; NoVLX-NEXT: .Lcfi646:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi537:
+; NoVLX-NEXT: .Lcfi647:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi538:
+; NoVLX-NEXT: .Lcfi648:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi539:
+; NoVLX-NEXT: .Lcfi649:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi540:
+; NoVLX-NEXT: .Lcfi650:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi541:
+; NoVLX-NEXT: .Lcfi651:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi542:
+; NoVLX-NEXT: .Lcfi652:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi543:
+; NoVLX-NEXT: .Lcfi653:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi544:
+; NoVLX-NEXT: .Lcfi654:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi545:
+; NoVLX-NEXT: .Lcfi655:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi546:
+; NoVLX-NEXT: .Lcfi656:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi547:
+; NoVLX-NEXT: .Lcfi657:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi548:
+; NoVLX-NEXT: .Lcfi658:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi549:
+; NoVLX-NEXT: .Lcfi659:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi550:
+; NoVLX-NEXT: .Lcfi660:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi551:
+; NoVLX-NEXT: .Lcfi661:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi552:
+; NoVLX-NEXT: .Lcfi662:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi553:
+; NoVLX-NEXT: .Lcfi663:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi554:
+; NoVLX-NEXT: .Lcfi664:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi555:
+; NoVLX-NEXT: .Lcfi665:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi556:
+; NoVLX-NEXT: .Lcfi666:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi557:
+; NoVLX-NEXT: .Lcfi667:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi558:
+; NoVLX-NEXT: .Lcfi668:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi559:
+; NoVLX-NEXT: .Lcfi669:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi560:
+; NoVLX-NEXT: .Lcfi670:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi561:
+; NoVLX-NEXT: .Lcfi671:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi562:
+; NoVLX-NEXT: .Lcfi672:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi563:
+; NoVLX-NEXT: .Lcfi673:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi564:
+; NoVLX-NEXT: .Lcfi674:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi565:
+; NoVLX-NEXT: .Lcfi675:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi566:
+; NoVLX-NEXT: .Lcfi676:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi567:
+; NoVLX-NEXT: .Lcfi677:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi568:
+; NoVLX-NEXT: .Lcfi678:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi569:
+; NoVLX-NEXT: .Lcfi679:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi570:
+; NoVLX-NEXT: .Lcfi680:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi571:
+; NoVLX-NEXT: .Lcfi681:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi572:
+; NoVLX-NEXT: .Lcfi682:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi573:
+; NoVLX-NEXT: .Lcfi683:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi574:
+; NoVLX-NEXT: .Lcfi684:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi575:
+; NoVLX-NEXT: .Lcfi685:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi576:
+; NoVLX-NEXT: .Lcfi686:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi577:
+; NoVLX-NEXT: .Lcfi687:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi578:
+; NoVLX-NEXT: .Lcfi688:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi579:
+; NoVLX-NEXT: .Lcfi689:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi580:
+; NoVLX-NEXT: .Lcfi690:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi581:
+; NoVLX-NEXT: .Lcfi691:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi582:
+; NoVLX-NEXT: .Lcfi692:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi583:
+; NoVLX-NEXT: .Lcfi693:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi584:
+; NoVLX-NEXT: .Lcfi694:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi585:
+; NoVLX-NEXT: .Lcfi695:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi586:
+; NoVLX-NEXT: .Lcfi696:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi587:
+; NoVLX-NEXT: .Lcfi697:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi588:
+; NoVLX-NEXT: .Lcfi698:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi589:
+; NoVLX-NEXT: .Lcfi699:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi590:
+; NoVLX-NEXT: .Lcfi700:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi591:
+; NoVLX-NEXT: .Lcfi701:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi592:
+; NoVLX-NEXT: .Lcfi702:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi703:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi704:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi705:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi706:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi707:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi593:
+; NoVLX-NEXT: .Lcfi708:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi594:
+; NoVLX-NEXT: .Lcfi709:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi595:
+; NoVLX-NEXT: .Lcfi710:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi711:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi712:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi713:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi714:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi715:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi596:
+; NoVLX-NEXT: .Lcfi716:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi597:
+; NoVLX-NEXT: .Lcfi717:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi598:
+; NoVLX-NEXT: .Lcfi718:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi719:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi720:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi721:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi722:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi723:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi599:
+; NoVLX-NEXT: .Lcfi724:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi600:
+; NoVLX-NEXT: .Lcfi725:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi601:
+; NoVLX-NEXT: .Lcfi726:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi727:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi728:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi729:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi730:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi731:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__a to <16 x i32>
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi602:
+; NoVLX-NEXT: .Lcfi732:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi603:
+; NoVLX-NEXT: .Lcfi733:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi604:
+; NoVLX-NEXT: .Lcfi734:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi735:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi736:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi737:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi738:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi739:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi605:
+; NoVLX-NEXT: .Lcfi740:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi606:
+; NoVLX-NEXT: .Lcfi741:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi607:
+; NoVLX-NEXT: .Lcfi742:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi743:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi744:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi745:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi746:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi747:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi608:
+; NoVLX-NEXT: .Lcfi748:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi609:
+; NoVLX-NEXT: .Lcfi749:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi610:
+; NoVLX-NEXT: .Lcfi750:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi611:
+; NoVLX-NEXT: .Lcfi751:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi612:
+; NoVLX-NEXT: .Lcfi752:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi613:
+; NoVLX-NEXT: .Lcfi753:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi614:
+; NoVLX-NEXT: .Lcfi754:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi615:
+; NoVLX-NEXT: .Lcfi755:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi616:
+; NoVLX-NEXT: .Lcfi756:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi617:
+; NoVLX-NEXT: .Lcfi757:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi618:
+; NoVLX-NEXT: .Lcfi758:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi619:
+; NoVLX-NEXT: .Lcfi759:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi620:
+; NoVLX-NEXT: .Lcfi760:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi621:
+; NoVLX-NEXT: .Lcfi761:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi622:
+; NoVLX-NEXT: .Lcfi762:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi623:
+; NoVLX-NEXT: .Lcfi763:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi624:
+; NoVLX-NEXT: .Lcfi764:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi625:
+; NoVLX-NEXT: .Lcfi765:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi626:
+; NoVLX-NEXT: .Lcfi766:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi627:
+; NoVLX-NEXT: .Lcfi767:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi628:
+; NoVLX-NEXT: .Lcfi768:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi629:
+; NoVLX-NEXT: .Lcfi769:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi630:
+; NoVLX-NEXT: .Lcfi770:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi631:
+; NoVLX-NEXT: .Lcfi771:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi632:
+; NoVLX-NEXT: .Lcfi772:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi633:
+; NoVLX-NEXT: .Lcfi773:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi634:
+; NoVLX-NEXT: .Lcfi774:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi635:
+; NoVLX-NEXT: .Lcfi775:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi636:
+; NoVLX-NEXT: .Lcfi776:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi637:
+; NoVLX-NEXT: .Lcfi777:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi638:
+; NoVLX-NEXT: .Lcfi778:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi639:
+; NoVLX-NEXT: .Lcfi779:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi640:
+; NoVLX-NEXT: .Lcfi780:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi641:
+; NoVLX-NEXT: .Lcfi781:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi642:
+; NoVLX-NEXT: .Lcfi782:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi643:
+; NoVLX-NEXT: .Lcfi783:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi644:
+; NoVLX-NEXT: .Lcfi784:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi645:
+; NoVLX-NEXT: .Lcfi785:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi646:
+; NoVLX-NEXT: .Lcfi786:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi647:
+; NoVLX-NEXT: .Lcfi787:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi648:
+; NoVLX-NEXT: .Lcfi788:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi649:
+; NoVLX-NEXT: .Lcfi789:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi650:
+; NoVLX-NEXT: .Lcfi790:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi651:
+; NoVLX-NEXT: .Lcfi791:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi652:
+; NoVLX-NEXT: .Lcfi792:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi653:
+; NoVLX-NEXT: .Lcfi793:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi654:
+; NoVLX-NEXT: .Lcfi794:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi655:
+; NoVLX-NEXT: .Lcfi795:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi656:
+; NoVLX-NEXT: .Lcfi796:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi657:
+; NoVLX-NEXT: .Lcfi797:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi658:
+; NoVLX-NEXT: .Lcfi798:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi659:
+; NoVLX-NEXT: .Lcfi799:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi660:
+; NoVLX-NEXT: .Lcfi800:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi661:
+; NoVLX-NEXT: .Lcfi801:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi662:
+; NoVLX-NEXT: .Lcfi802:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi663:
+; NoVLX-NEXT: .Lcfi803:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi664:
+; NoVLX-NEXT: .Lcfi804:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi665:
+; NoVLX-NEXT: .Lcfi805:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi666:
+; NoVLX-NEXT: .Lcfi806:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi667:
+; NoVLX-NEXT: .Lcfi807:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi668:
+; NoVLX-NEXT: .Lcfi808:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi669:
+; NoVLX-NEXT: .Lcfi809:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi670:
+; NoVLX-NEXT: .Lcfi810:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi671:
+; NoVLX-NEXT: .Lcfi811:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi672:
+; NoVLX-NEXT: .Lcfi812:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi673:
+; NoVLX-NEXT: .Lcfi813:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi674:
+; NoVLX-NEXT: .Lcfi814:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi675:
+; NoVLX-NEXT: .Lcfi815:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi676:
+; NoVLX-NEXT: .Lcfi816:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi677:
+; NoVLX-NEXT: .Lcfi817:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi678:
+; NoVLX-NEXT: .Lcfi818:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi679:
+; NoVLX-NEXT: .Lcfi819:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi680:
+; NoVLX-NEXT: .Lcfi820:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi681:
+; NoVLX-NEXT: .Lcfi821:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi682:
+; NoVLX-NEXT: .Lcfi822:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi683:
+; NoVLX-NEXT: .Lcfi823:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi684:
+; NoVLX-NEXT: .Lcfi824:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi685:
+; NoVLX-NEXT: .Lcfi825:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi686:
+; NoVLX-NEXT: .Lcfi826:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi687:
+; NoVLX-NEXT: .Lcfi827:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi688:
+; NoVLX-NEXT: .Lcfi828:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi689:
+; NoVLX-NEXT: .Lcfi829:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi690:
+; NoVLX-NEXT: .Lcfi830:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi691:
+; NoVLX-NEXT: .Lcfi831:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi692:
+; NoVLX-NEXT: .Lcfi832:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi693:
+; NoVLX-NEXT: .Lcfi833:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi694:
+; NoVLX-NEXT: .Lcfi834:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi695:
+; NoVLX-NEXT: .Lcfi835:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi696:
+; NoVLX-NEXT: .Lcfi836:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi697:
+; NoVLX-NEXT: .Lcfi837:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi698:
+; NoVLX-NEXT: .Lcfi838:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi699:
+; NoVLX-NEXT: .Lcfi839:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi700:
+; NoVLX-NEXT: .Lcfi840:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi701:
+; NoVLX-NEXT: .Lcfi841:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi702:
+; NoVLX-NEXT: .Lcfi842:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi703:
+; NoVLX-NEXT: .Lcfi843:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi704:
+; NoVLX-NEXT: .Lcfi844:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi705:
+; NoVLX-NEXT: .Lcfi845:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi706:
+; NoVLX-NEXT: .Lcfi846:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi707:
+; NoVLX-NEXT: .Lcfi847:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi708:
+; NoVLX-NEXT: .Lcfi848:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi709:
+; NoVLX-NEXT: .Lcfi849:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi710:
+; NoVLX-NEXT: .Lcfi850:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi711:
+; NoVLX-NEXT: .Lcfi851:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi712:
+; NoVLX-NEXT: .Lcfi852:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi713:
+; NoVLX-NEXT: .Lcfi853:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi714:
+; NoVLX-NEXT: .Lcfi854:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi715:
+; NoVLX-NEXT: .Lcfi855:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi716:
+; NoVLX-NEXT: .Lcfi856:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi717:
+; NoVLX-NEXT: .Lcfi857:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi718:
+; NoVLX-NEXT: .Lcfi858:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi719:
+; NoVLX-NEXT: .Lcfi859:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi720:
+; NoVLX-NEXT: .Lcfi860:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi721:
+; NoVLX-NEXT: .Lcfi861:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi722:
+; NoVLX-NEXT: .Lcfi862:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi723:
+; NoVLX-NEXT: .Lcfi863:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi724:
+; NoVLX-NEXT: .Lcfi864:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi725:
+; NoVLX-NEXT: .Lcfi865:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi726:
+; NoVLX-NEXT: .Lcfi866:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi727:
+; NoVLX-NEXT: .Lcfi867:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi728:
+; NoVLX-NEXT: .Lcfi868:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi729:
+; NoVLX-NEXT: .Lcfi869:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi730:
+; NoVLX-NEXT: .Lcfi870:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi731:
+; NoVLX-NEXT: .Lcfi871:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi732:
+; NoVLX-NEXT: .Lcfi872:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi733:
+; NoVLX-NEXT: .Lcfi873:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi734:
+; NoVLX-NEXT: .Lcfi874:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi735:
+; NoVLX-NEXT: .Lcfi875:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi736:
+; NoVLX-NEXT: .Lcfi876:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi737:
+; NoVLX-NEXT: .Lcfi877:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi738:
+; NoVLX-NEXT: .Lcfi878:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi739:
+; NoVLX-NEXT: .Lcfi879:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi740:
+; NoVLX-NEXT: .Lcfi880:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi741:
+; NoVLX-NEXT: .Lcfi881:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi742:
+; NoVLX-NEXT: .Lcfi882:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi743:
+; NoVLX-NEXT: .Lcfi883:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi744:
+; NoVLX-NEXT: .Lcfi884:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi745:
+; NoVLX-NEXT: .Lcfi885:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi746:
+; NoVLX-NEXT: .Lcfi886:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi747:
+; NoVLX-NEXT: .Lcfi887:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi748:
+; NoVLX-NEXT: .Lcfi888:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi749:
+; NoVLX-NEXT: .Lcfi889:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi750:
+; NoVLX-NEXT: .Lcfi890:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi751:
+; NoVLX-NEXT: .Lcfi891:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi752:
+; NoVLX-NEXT: .Lcfi892:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi753:
+; NoVLX-NEXT: .Lcfi893:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi754:
+; NoVLX-NEXT: .Lcfi894:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi755:
+; NoVLX-NEXT: .Lcfi895:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi756:
+; NoVLX-NEXT: .Lcfi896:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi757:
+; NoVLX-NEXT: .Lcfi897:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi758:
+; NoVLX-NEXT: .Lcfi898:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi759:
+; NoVLX-NEXT: .Lcfi899:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi760:
+; NoVLX-NEXT: .Lcfi900:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi761:
+; NoVLX-NEXT: .Lcfi901:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi762:
+; NoVLX-NEXT: .Lcfi902:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi763:
+; NoVLX-NEXT: .Lcfi903:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi764:
+; NoVLX-NEXT: .Lcfi904:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi765:
+; NoVLX-NEXT: .Lcfi905:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi766:
+; NoVLX-NEXT: .Lcfi906:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi907:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi908:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi909:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi910:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi911:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi767:
+; NoVLX-NEXT: .Lcfi912:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi768:
+; NoVLX-NEXT: .Lcfi913:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi769:
+; NoVLX-NEXT: .Lcfi914:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi915:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi916:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi917:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi918:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi919:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi770:
+; NoVLX-NEXT: .Lcfi920:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi771:
+; NoVLX-NEXT: .Lcfi921:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi772:
+; NoVLX-NEXT: .Lcfi922:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi923:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi924:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi925:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi926:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi927:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi773:
+; NoVLX-NEXT: .Lcfi928:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi774:
+; NoVLX-NEXT: .Lcfi929:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi775:
+; NoVLX-NEXT: .Lcfi930:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi931:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi932:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi933:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi934:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi935:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi776:
+; NoVLX-NEXT: .Lcfi936:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi777:
+; NoVLX-NEXT: .Lcfi937:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi778:
+; NoVLX-NEXT: .Lcfi938:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi779:
+; NoVLX-NEXT: .Lcfi939:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi780:
+; NoVLX-NEXT: .Lcfi940:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi781:
+; NoVLX-NEXT: .Lcfi941:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi782:
+; NoVLX-NEXT: .Lcfi942:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi783:
+; NoVLX-NEXT: .Lcfi943:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi784:
+; NoVLX-NEXT: .Lcfi944:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi785:
+; NoVLX-NEXT: .Lcfi945:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi786:
+; NoVLX-NEXT: .Lcfi946:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi787:
+; NoVLX-NEXT: .Lcfi947:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi788:
+; NoVLX-NEXT: .Lcfi948:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi789:
+; NoVLX-NEXT: .Lcfi949:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi790:
+; NoVLX-NEXT: .Lcfi950:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi791:
+; NoVLX-NEXT: .Lcfi951:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi792:
+; NoVLX-NEXT: .Lcfi952:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi793:
+; NoVLX-NEXT: .Lcfi953:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi794:
+; NoVLX-NEXT: .Lcfi954:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi795:
+; NoVLX-NEXT: .Lcfi955:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi796:
+; NoVLX-NEXT: .Lcfi956:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi797:
+; NoVLX-NEXT: .Lcfi957:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi798:
+; NoVLX-NEXT: .Lcfi958:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi799:
+; NoVLX-NEXT: .Lcfi959:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi800:
+; NoVLX-NEXT: .Lcfi960:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi801:
+; NoVLX-NEXT: .Lcfi961:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi802:
+; NoVLX-NEXT: .Lcfi962:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi803:
+; NoVLX-NEXT: .Lcfi963:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi804:
+; NoVLX-NEXT: .Lcfi964:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi805:
+; NoVLX-NEXT: .Lcfi965:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi806:
+; NoVLX-NEXT: .Lcfi966:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi807:
+; NoVLX-NEXT: .Lcfi967:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi808:
+; NoVLX-NEXT: .Lcfi968:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi809:
+; NoVLX-NEXT: .Lcfi969:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi810:
+; NoVLX-NEXT: .Lcfi970:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi811:
+; NoVLX-NEXT: .Lcfi971:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi812:
+; NoVLX-NEXT: .Lcfi972:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi813:
+; NoVLX-NEXT: .Lcfi973:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi814:
+; NoVLX-NEXT: .Lcfi974:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi815:
+; NoVLX-NEXT: .Lcfi975:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi816:
+; NoVLX-NEXT: .Lcfi976:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi817:
+; NoVLX-NEXT: .Lcfi977:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi818:
+; NoVLX-NEXT: .Lcfi978:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi819:
+; NoVLX-NEXT: .Lcfi979:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi820:
+; NoVLX-NEXT: .Lcfi980:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi821:
+; NoVLX-NEXT: .Lcfi981:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi822:
+; NoVLX-NEXT: .Lcfi982:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi823:
+; NoVLX-NEXT: .Lcfi983:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi824:
+; NoVLX-NEXT: .Lcfi984:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi825:
+; NoVLX-NEXT: .Lcfi985:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi826:
+; NoVLX-NEXT: .Lcfi986:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi827:
+; NoVLX-NEXT: .Lcfi987:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi828:
+; NoVLX-NEXT: .Lcfi988:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi829:
+; NoVLX-NEXT: .Lcfi989:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi830:
+; NoVLX-NEXT: .Lcfi990:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi831:
+; NoVLX-NEXT: .Lcfi991:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi832:
+; NoVLX-NEXT: .Lcfi992:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi833:
+; NoVLX-NEXT: .Lcfi993:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi834:
+; NoVLX-NEXT: .Lcfi994:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi835:
+; NoVLX-NEXT: .Lcfi995:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi836:
+; NoVLX-NEXT: .Lcfi996:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi837:
+; NoVLX-NEXT: .Lcfi997:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi838:
+; NoVLX-NEXT: .Lcfi998:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi839:
+; NoVLX-NEXT: .Lcfi999:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi840:
+; NoVLX-NEXT: .Lcfi1000:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi841:
+; NoVLX-NEXT: .Lcfi1001:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi842:
+; NoVLX-NEXT: .Lcfi1002:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi843:
+; NoVLX-NEXT: .Lcfi1003:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi844:
+; NoVLX-NEXT: .Lcfi1004:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi845:
+; NoVLX-NEXT: .Lcfi1005:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi846:
+; NoVLX-NEXT: .Lcfi1006:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1007:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1008:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1009:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1010:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1011:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi847:
+; NoVLX-NEXT: .Lcfi1012:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi848:
+; NoVLX-NEXT: .Lcfi1013:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi849:
+; NoVLX-NEXT: .Lcfi1014:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1015:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1016:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1017:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1018:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1019:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi850:
+; NoVLX-NEXT: .Lcfi1020:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi851:
+; NoVLX-NEXT: .Lcfi1021:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi852:
+; NoVLX-NEXT: .Lcfi1022:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1023:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1024:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1025:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1026:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1027:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi853:
+; NoVLX-NEXT: .Lcfi1028:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi854:
+; NoVLX-NEXT: .Lcfi1029:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi855:
+; NoVLX-NEXT: .Lcfi1030:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1031:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1032:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1033:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1034:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1035:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi856:
+; NoVLX-NEXT: .Lcfi1036:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi857:
+; NoVLX-NEXT: .Lcfi1037:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi858:
+; NoVLX-NEXT: .Lcfi1038:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi859:
+; NoVLX-NEXT: .Lcfi1039:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi860:
+; NoVLX-NEXT: .Lcfi1040:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi861:
+; NoVLX-NEXT: .Lcfi1041:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi862:
+; NoVLX-NEXT: .Lcfi1042:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi863:
+; NoVLX-NEXT: .Lcfi1043:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi864:
+; NoVLX-NEXT: .Lcfi1044:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi865:
+; NoVLX-NEXT: .Lcfi1045:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi866:
+; NoVLX-NEXT: .Lcfi1046:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi867:
+; NoVLX-NEXT: .Lcfi1047:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi868:
+; NoVLX-NEXT: .Lcfi1048:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi869:
+; NoVLX-NEXT: .Lcfi1049:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi870:
+; NoVLX-NEXT: .Lcfi1050:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi871:
+; NoVLX-NEXT: .Lcfi1051:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi872:
+; NoVLX-NEXT: .Lcfi1052:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi873:
+; NoVLX-NEXT: .Lcfi1053:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi874:
+; NoVLX-NEXT: .Lcfi1054:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi875:
+; NoVLX-NEXT: .Lcfi1055:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi876:
+; NoVLX-NEXT: .Lcfi1056:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi877:
+; NoVLX-NEXT: .Lcfi1057:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi878:
+; NoVLX-NEXT: .Lcfi1058:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi879:
+; NoVLX-NEXT: .Lcfi1059:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi880:
+; NoVLX-NEXT: .Lcfi1060:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi881:
+; NoVLX-NEXT: .Lcfi1061:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi882:
+; NoVLX-NEXT: .Lcfi1062:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi883:
+; NoVLX-NEXT: .Lcfi1063:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi884:
+; NoVLX-NEXT: .Lcfi1064:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi885:
+; NoVLX-NEXT: .Lcfi1065:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi886:
+; NoVLX-NEXT: .Lcfi1066:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi887:
+; NoVLX-NEXT: .Lcfi1067:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi888:
+; NoVLX-NEXT: .Lcfi1068:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi889:
+; NoVLX-NEXT: .Lcfi1069:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi890:
+; NoVLX-NEXT: .Lcfi1070:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm0
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm7, %xmm4
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi891:
+; NoVLX-NEXT: .Lcfi1071:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi892:
+; NoVLX-NEXT: .Lcfi1072:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi893:
+; NoVLX-NEXT: .Lcfi1073:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm2
-; NoVLX-NEXT: vmovdqa (%rdi), %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi894:
+; NoVLX-NEXT: .Lcfi1074:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi895:
+; NoVLX-NEXT: .Lcfi1075:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi896:
+; NoVLX-NEXT: .Lcfi1076:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm7, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi897:
+; NoVLX-NEXT: .Lcfi1077:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi898:
+; NoVLX-NEXT: .Lcfi1078:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi899:
+; NoVLX-NEXT: .Lcfi1079:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
+; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
+; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm4
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi900:
+; NoVLX-NEXT: .Lcfi1080:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi901:
+; NoVLX-NEXT: .Lcfi1081:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi902:
+; NoVLX-NEXT: .Lcfi1082:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi903:
+; NoVLX-NEXT: .Lcfi1083:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi904:
+; NoVLX-NEXT: .Lcfi1084:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi905:
+; NoVLX-NEXT: .Lcfi1085:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi906:
+; NoVLX-NEXT: .Lcfi1086:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi907:
+; NoVLX-NEXT: .Lcfi1087:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi908:
+; NoVLX-NEXT: .Lcfi1088:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi909:
+; NoVLX-NEXT: .Lcfi1089:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi910:
+; NoVLX-NEXT: .Lcfi1090:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi911:
+; NoVLX-NEXT: .Lcfi1091:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi912:
+; NoVLX-NEXT: .Lcfi1092:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi913:
+; NoVLX-NEXT: .Lcfi1093:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi914:
+; NoVLX-NEXT: .Lcfi1094:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi915:
+; NoVLX-NEXT: .Lcfi1095:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi916:
+; NoVLX-NEXT: .Lcfi1096:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi917:
+; NoVLX-NEXT: .Lcfi1097:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi918:
+; NoVLX-NEXT: .Lcfi1098:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi919:
+; NoVLX-NEXT: .Lcfi1099:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi920:
+; NoVLX-NEXT: .Lcfi1100:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi921:
+; NoVLX-NEXT: .Lcfi1101:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi922:
+; NoVLX-NEXT: .Lcfi1102:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi923:
+; NoVLX-NEXT: .Lcfi1103:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi924:
+; NoVLX-NEXT: .Lcfi1104:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi925:
+; NoVLX-NEXT: .Lcfi1105:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi926:
+; NoVLX-NEXT: .Lcfi1106:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi927:
+; NoVLX-NEXT: .Lcfi1107:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi928:
+; NoVLX-NEXT: .Lcfi1108:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi929:
+; NoVLX-NEXT: .Lcfi1109:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi930:
+; NoVLX-NEXT: .Lcfi1110:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi931:
+; NoVLX-NEXT: .Lcfi1111:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi932:
+; NoVLX-NEXT: .Lcfi1112:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi933:
+; NoVLX-NEXT: .Lcfi1113:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi934:
+; NoVLX-NEXT: .Lcfi1114:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi935:
+; NoVLX-NEXT: .Lcfi1115:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi936:
+; NoVLX-NEXT: .Lcfi1116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi937:
+; NoVLX-NEXT: .Lcfi1117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi938:
+; NoVLX-NEXT: .Lcfi1118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi939:
+; NoVLX-NEXT: .Lcfi1119:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi940:
+; NoVLX-NEXT: .Lcfi1120:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi941:
+; NoVLX-NEXT: .Lcfi1121:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi942:
+; NoVLX-NEXT: .Lcfi1122:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi943:
+; NoVLX-NEXT: .Lcfi1123:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi944:
+; NoVLX-NEXT: .Lcfi1124:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi945:
+; NoVLX-NEXT: .Lcfi1125:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi946:
+; NoVLX-NEXT: .Lcfi1126:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi947:
+; NoVLX-NEXT: .Lcfi1127:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi948:
+; NoVLX-NEXT: .Lcfi1128:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi949:
+; NoVLX-NEXT: .Lcfi1129:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi950:
+; NoVLX-NEXT: .Lcfi1130:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi951:
+; NoVLX-NEXT: .Lcfi1131:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi952:
+; NoVLX-NEXT: .Lcfi1132:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi953:
+; NoVLX-NEXT: .Lcfi1133:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi954:
+; NoVLX-NEXT: .Lcfi1134:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi955:
+; NoVLX-NEXT: .Lcfi1135:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi956:
+; NoVLX-NEXT: .Lcfi1136:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi957:
+; NoVLX-NEXT: .Lcfi1137:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi958:
+; NoVLX-NEXT: .Lcfi1138:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi959:
+; NoVLX-NEXT: .Lcfi1139:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi960:
+; NoVLX-NEXT: .Lcfi1140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi961:
+; NoVLX-NEXT: .Lcfi1141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi962:
+; NoVLX-NEXT: .Lcfi1142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi963:
+; NoVLX-NEXT: .Lcfi1143:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi964:
+; NoVLX-NEXT: .Lcfi1144:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi965:
+; NoVLX-NEXT: .Lcfi1145:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi966:
+; NoVLX-NEXT: .Lcfi1146:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi967:
+; NoVLX-NEXT: .Lcfi1147:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi968:
+; NoVLX-NEXT: .Lcfi1148:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi969:
+; NoVLX-NEXT: .Lcfi1149:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi970:
+; NoVLX-NEXT: .Lcfi1150:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi971:
+; NoVLX-NEXT: .Lcfi1151:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi972:
+; NoVLX-NEXT: .Lcfi1152:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi973:
+; NoVLX-NEXT: .Lcfi1153:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi974:
+; NoVLX-NEXT: .Lcfi1154:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1155:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1156:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1157:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1158:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1159:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi975:
+; NoVLX-NEXT: .Lcfi1160:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi976:
+; NoVLX-NEXT: .Lcfi1161:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi977:
+; NoVLX-NEXT: .Lcfi1162:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1163:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1164:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1165:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1166:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1167:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi978:
+; NoVLX-NEXT: .Lcfi1168:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi979:
+; NoVLX-NEXT: .Lcfi1169:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi980:
+; NoVLX-NEXT: .Lcfi1170:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1171:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1172:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1173:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1174:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1175:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi981:
+; NoVLX-NEXT: .Lcfi1176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi982:
+; NoVLX-NEXT: .Lcfi1177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi983:
+; NoVLX-NEXT: .Lcfi1178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1179:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1180:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1181:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1182:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1183:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi984:
+; NoVLX-NEXT: .Lcfi1184:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi985:
+; NoVLX-NEXT: .Lcfi1185:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi986:
+; NoVLX-NEXT: .Lcfi1186:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1187:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1188:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1189:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1190:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1191:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi987:
+; NoVLX-NEXT: .Lcfi1192:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi988:
+; NoVLX-NEXT: .Lcfi1193:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi989:
+; NoVLX-NEXT: .Lcfi1194:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1195:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1196:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1197:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1198:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1199:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi990:
+; NoVLX-NEXT: .Lcfi1200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi991:
+; NoVLX-NEXT: .Lcfi1201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi992:
+; NoVLX-NEXT: .Lcfi1202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi993:
+; NoVLX-NEXT: .Lcfi1203:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi994:
+; NoVLX-NEXT: .Lcfi1204:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi995:
+; NoVLX-NEXT: .Lcfi1205:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi996:
+; NoVLX-NEXT: .Lcfi1206:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi997:
+; NoVLX-NEXT: .Lcfi1207:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1208:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1209:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1210:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1211:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1212:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1213:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1214:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1215:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__a to <16 x i32>
- %1 = bitcast <8 x i64> %__b to <16 x i32>
- %2 = icmp sge <16 x i32> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
-}
-
-define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
-; VLX: # BB#0: # %entry
-; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
-; NoVLX: # BB#0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi998:
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi999:
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1000:
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1001:
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1002:
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1003:
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1004:
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1005:
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1006:
+; NoVLX-NEXT: .Lcfi1216:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1007:
+; NoVLX-NEXT: .Lcfi1217:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1008:
+; NoVLX-NEXT: .Lcfi1218:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1009:
+; NoVLX-NEXT: .Lcfi1219:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1010:
+; NoVLX-NEXT: .Lcfi1220:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1011:
+; NoVLX-NEXT: .Lcfi1221:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1012:
+; NoVLX-NEXT: .Lcfi1222:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1013:
+; NoVLX-NEXT: .Lcfi1223:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1014:
+; NoVLX-NEXT: .Lcfi1224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1015:
+; NoVLX-NEXT: .Lcfi1225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1016:
+; NoVLX-NEXT: .Lcfi1226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1017:
+; NoVLX-NEXT: .Lcfi1227:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1018:
+; NoVLX-NEXT: .Lcfi1228:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1019:
+; NoVLX-NEXT: .Lcfi1229:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1020:
+; NoVLX-NEXT: .Lcfi1230:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1021:
+; NoVLX-NEXT: .Lcfi1231:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1022:
+; NoVLX-NEXT: .Lcfi1232:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1023:
+; NoVLX-NEXT: .Lcfi1233:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1024:
+; NoVLX-NEXT: .Lcfi1234:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1025:
+; NoVLX-NEXT: .Lcfi1235:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1026:
+; NoVLX-NEXT: .Lcfi1236:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1027:
+; NoVLX-NEXT: .Lcfi1237:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1028:
+; NoVLX-NEXT: .Lcfi1238:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1029:
+; NoVLX-NEXT: .Lcfi1239:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1030:
+; NoVLX-NEXT: .Lcfi1240:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1031:
+; NoVLX-NEXT: .Lcfi1241:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1032:
+; NoVLX-NEXT: .Lcfi1242:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1033:
+; NoVLX-NEXT: .Lcfi1243:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1034:
+; NoVLX-NEXT: .Lcfi1244:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1035:
+; NoVLX-NEXT: .Lcfi1245:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1036:
+; NoVLX-NEXT: .Lcfi1246:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1037:
+; NoVLX-NEXT: .Lcfi1247:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1038:
+; NoVLX-NEXT: .Lcfi1248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1039:
+; NoVLX-NEXT: .Lcfi1249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1040:
+; NoVLX-NEXT: .Lcfi1250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1041:
+; NoVLX-NEXT: .Lcfi1251:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1042:
+; NoVLX-NEXT: .Lcfi1252:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1043:
+; NoVLX-NEXT: .Lcfi1253:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1044:
+; NoVLX-NEXT: .Lcfi1254:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1045:
+; NoVLX-NEXT: .Lcfi1255:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1046:
+; NoVLX-NEXT: .Lcfi1256:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1047:
+; NoVLX-NEXT: .Lcfi1257:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1048:
+; NoVLX-NEXT: .Lcfi1258:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1049:
+; NoVLX-NEXT: .Lcfi1259:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1050:
+; NoVLX-NEXT: .Lcfi1260:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1051:
+; NoVLX-NEXT: .Lcfi1261:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1052:
+; NoVLX-NEXT: .Lcfi1262:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1053:
+; NoVLX-NEXT: .Lcfi1263:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1054:
+; NoVLX-NEXT: .Lcfi1264:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1055:
+; NoVLX-NEXT: .Lcfi1265:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1056:
+; NoVLX-NEXT: .Lcfi1266:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1057:
+; NoVLX-NEXT: .Lcfi1267:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1058:
+; NoVLX-NEXT: .Lcfi1268:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1059:
+; NoVLX-NEXT: .Lcfi1269:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1060:
+; NoVLX-NEXT: .Lcfi1270:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1061:
+; NoVLX-NEXT: .Lcfi1271:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1062:
+; NoVLX-NEXT: .Lcfi1272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1063:
+; NoVLX-NEXT: .Lcfi1273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1064:
+; NoVLX-NEXT: .Lcfi1274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1065:
+; NoVLX-NEXT: .Lcfi1275:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1066:
+; NoVLX-NEXT: .Lcfi1276:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1067:
+; NoVLX-NEXT: .Lcfi1277:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1068:
+; NoVLX-NEXT: .Lcfi1278:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1069:
+; NoVLX-NEXT: .Lcfi1279:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1070:
+; NoVLX-NEXT: .Lcfi1280:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1071:
+; NoVLX-NEXT: .Lcfi1281:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1072:
+; NoVLX-NEXT: .Lcfi1282:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1073:
+; NoVLX-NEXT: .Lcfi1283:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1074:
+; NoVLX-NEXT: .Lcfi1284:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1075:
+; NoVLX-NEXT: .Lcfi1285:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1076:
+; NoVLX-NEXT: .Lcfi1286:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1077:
+; NoVLX-NEXT: .Lcfi1287:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1078:
+; NoVLX-NEXT: .Lcfi1288:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1079:
+; NoVLX-NEXT: .Lcfi1289:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1080:
+; NoVLX-NEXT: .Lcfi1290:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1081:
+; NoVLX-NEXT: .Lcfi1291:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1082:
+; NoVLX-NEXT: .Lcfi1292:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1083:
+; NoVLX-NEXT: .Lcfi1293:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1084:
+; NoVLX-NEXT: .Lcfi1294:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1085:
+; NoVLX-NEXT: .Lcfi1295:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1086:
+; NoVLX-NEXT: .Lcfi1296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1087:
+; NoVLX-NEXT: .Lcfi1297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1088:
+; NoVLX-NEXT: .Lcfi1298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1089:
+; NoVLX-NEXT: .Lcfi1299:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1090:
+; NoVLX-NEXT: .Lcfi1300:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1091:
+; NoVLX-NEXT: .Lcfi1301:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1092:
+; NoVLX-NEXT: .Lcfi1302:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1093:
+; NoVLX-NEXT: .Lcfi1303:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1094:
+; NoVLX-NEXT: .Lcfi1304:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1095:
+; NoVLX-NEXT: .Lcfi1305:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1096:
+; NoVLX-NEXT: .Lcfi1306:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1097:
+; NoVLX-NEXT: .Lcfi1307:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1098:
+; NoVLX-NEXT: .Lcfi1308:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1099:
+; NoVLX-NEXT: .Lcfi1309:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1100:
+; NoVLX-NEXT: .Lcfi1310:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1101:
+; NoVLX-NEXT: .Lcfi1311:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1102:
+; NoVLX-NEXT: .Lcfi1312:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1103:
+; NoVLX-NEXT: .Lcfi1313:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1104:
+; NoVLX-NEXT: .Lcfi1314:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1105:
+; NoVLX-NEXT: .Lcfi1315:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1106:
+; NoVLX-NEXT: .Lcfi1316:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1107:
+; NoVLX-NEXT: .Lcfi1317:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1108:
+; NoVLX-NEXT: .Lcfi1318:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1109:
+; NoVLX-NEXT: .Lcfi1319:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1110:
+; NoVLX-NEXT: .Lcfi1320:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1111:
+; NoVLX-NEXT: .Lcfi1321:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1112:
+; NoVLX-NEXT: .Lcfi1322:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1113:
+; NoVLX-NEXT: .Lcfi1323:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1114:
+; NoVLX-NEXT: .Lcfi1324:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1115:
+; NoVLX-NEXT: .Lcfi1325:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1116:
+; NoVLX-NEXT: .Lcfi1326:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1117:
+; NoVLX-NEXT: .Lcfi1327:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1118:
+; NoVLX-NEXT: .Lcfi1328:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1119:
+; NoVLX-NEXT: .Lcfi1329:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1120:
+; NoVLX-NEXT: .Lcfi1330:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1121:
+; NoVLX-NEXT: .Lcfi1331:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1122:
+; NoVLX-NEXT: .Lcfi1332:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1123:
+; NoVLX-NEXT: .Lcfi1333:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1124:
+; NoVLX-NEXT: .Lcfi1334:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1125:
+; NoVLX-NEXT: .Lcfi1335:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1126:
+; NoVLX-NEXT: .Lcfi1336:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1127:
+; NoVLX-NEXT: .Lcfi1337:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1128:
+; NoVLX-NEXT: .Lcfi1338:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1129:
+; NoVLX-NEXT: .Lcfi1339:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1130:
+; NoVLX-NEXT: .Lcfi1340:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1131:
+; NoVLX-NEXT: .Lcfi1341:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1132:
+; NoVLX-NEXT: .Lcfi1342:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1133:
+; NoVLX-NEXT: .Lcfi1343:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1134:
+; NoVLX-NEXT: .Lcfi1344:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1135:
+; NoVLX-NEXT: .Lcfi1345:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1136:
+; NoVLX-NEXT: .Lcfi1346:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1137:
+; NoVLX-NEXT: .Lcfi1347:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1138:
+; NoVLX-NEXT: .Lcfi1348:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1139:
+; NoVLX-NEXT: .Lcfi1349:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1140:
+; NoVLX-NEXT: .Lcfi1350:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1141:
+; NoVLX-NEXT: .Lcfi1351:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1142:
+; NoVLX-NEXT: .Lcfi1352:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1143:
+; NoVLX-NEXT: .Lcfi1353:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1144:
+; NoVLX-NEXT: .Lcfi1354:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1145:
+; NoVLX-NEXT: .Lcfi1355:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1146:
+; NoVLX-NEXT: .Lcfi1356:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1147:
+; NoVLX-NEXT: .Lcfi1357:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1148:
+; NoVLX-NEXT: .Lcfi1358:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1359:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1360:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1361:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1362:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1363:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1149:
+; NoVLX-NEXT: .Lcfi1364:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1150:
+; NoVLX-NEXT: .Lcfi1365:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1151:
+; NoVLX-NEXT: .Lcfi1366:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1367:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1368:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1369:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1370:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1371:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1152:
+; NoVLX-NEXT: .Lcfi1372:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1153:
+; NoVLX-NEXT: .Lcfi1373:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1154:
+; NoVLX-NEXT: .Lcfi1374:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1375:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1376:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1377:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1378:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1379:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1155:
+; NoVLX-NEXT: .Lcfi1380:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1156:
+; NoVLX-NEXT: .Lcfi1381:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1157:
+; NoVLX-NEXT: .Lcfi1382:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1383:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1384:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1385:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1386:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1387:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1158:
+; NoVLX-NEXT: .Lcfi1388:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1159:
+; NoVLX-NEXT: .Lcfi1389:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1160:
+; NoVLX-NEXT: .Lcfi1390:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1161:
+; NoVLX-NEXT: .Lcfi1391:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1162:
+; NoVLX-NEXT: .Lcfi1392:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1163:
+; NoVLX-NEXT: .Lcfi1393:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1164:
+; NoVLX-NEXT: .Lcfi1394:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1165:
+; NoVLX-NEXT: .Lcfi1395:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1166:
+; NoVLX-NEXT: .Lcfi1396:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1167:
+; NoVLX-NEXT: .Lcfi1397:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1168:
+; NoVLX-NEXT: .Lcfi1398:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1169:
+; NoVLX-NEXT: .Lcfi1399:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1170:
+; NoVLX-NEXT: .Lcfi1400:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1171:
+; NoVLX-NEXT: .Lcfi1401:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1172:
+; NoVLX-NEXT: .Lcfi1402:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1173:
+; NoVLX-NEXT: .Lcfi1403:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1174:
+; NoVLX-NEXT: .Lcfi1404:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1175:
+; NoVLX-NEXT: .Lcfi1405:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1176:
+; NoVLX-NEXT: .Lcfi1406:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1177:
+; NoVLX-NEXT: .Lcfi1407:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1178:
+; NoVLX-NEXT: .Lcfi1408:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1179:
+; NoVLX-NEXT: .Lcfi1409:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1180:
+; NoVLX-NEXT: .Lcfi1410:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1181:
+; NoVLX-NEXT: .Lcfi1411:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1182:
+; NoVLX-NEXT: .Lcfi1412:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1183:
+; NoVLX-NEXT: .Lcfi1413:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1184:
+; NoVLX-NEXT: .Lcfi1414:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1185:
+; NoVLX-NEXT: .Lcfi1415:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1186:
+; NoVLX-NEXT: .Lcfi1416:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1187:
+; NoVLX-NEXT: .Lcfi1417:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1188:
+; NoVLX-NEXT: .Lcfi1418:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1189:
+; NoVLX-NEXT: .Lcfi1419:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1190:
+; NoVLX-NEXT: .Lcfi1420:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1191:
+; NoVLX-NEXT: .Lcfi1421:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1192:
+; NoVLX-NEXT: .Lcfi1422:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1193:
+; NoVLX-NEXT: .Lcfi1423:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1194:
+; NoVLX-NEXT: .Lcfi1424:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1195:
+; NoVLX-NEXT: .Lcfi1425:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1196:
+; NoVLX-NEXT: .Lcfi1426:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1197:
+; NoVLX-NEXT: .Lcfi1427:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1198:
+; NoVLX-NEXT: .Lcfi1428:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1199:
+; NoVLX-NEXT: .Lcfi1429:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1200:
+; NoVLX-NEXT: .Lcfi1430:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1201:
+; NoVLX-NEXT: .Lcfi1431:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1202:
+; NoVLX-NEXT: .Lcfi1432:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1203:
+; NoVLX-NEXT: .Lcfi1433:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1204:
+; NoVLX-NEXT: .Lcfi1434:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1205:
+; NoVLX-NEXT: .Lcfi1435:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1206:
+; NoVLX-NEXT: .Lcfi1436:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1207:
+; NoVLX-NEXT: .Lcfi1437:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1208:
+; NoVLX-NEXT: .Lcfi1438:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1209:
+; NoVLX-NEXT: .Lcfi1439:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1210:
+; NoVLX-NEXT: .Lcfi1440:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1211:
+; NoVLX-NEXT: .Lcfi1441:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1212:
+; NoVLX-NEXT: .Lcfi1442:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1213:
+; NoVLX-NEXT: .Lcfi1443:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1214:
+; NoVLX-NEXT: .Lcfi1444:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1215:
+; NoVLX-NEXT: .Lcfi1445:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1216:
+; NoVLX-NEXT: .Lcfi1446:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1217:
+; NoVLX-NEXT: .Lcfi1447:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1218:
+; NoVLX-NEXT: .Lcfi1448:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1219:
+; NoVLX-NEXT: .Lcfi1449:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1220:
+; NoVLX-NEXT: .Lcfi1450:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1221:
+; NoVLX-NEXT: .Lcfi1451:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1222:
+; NoVLX-NEXT: .Lcfi1452:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1223:
+; NoVLX-NEXT: .Lcfi1453:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1224:
+; NoVLX-NEXT: .Lcfi1454:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1225:
+; NoVLX-NEXT: .Lcfi1455:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1226:
+; NoVLX-NEXT: .Lcfi1456:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1227:
+; NoVLX-NEXT: .Lcfi1457:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1228:
+; NoVLX-NEXT: .Lcfi1458:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1459:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1460:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1461:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1462:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1463:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1229:
+; NoVLX-NEXT: .Lcfi1464:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1230:
+; NoVLX-NEXT: .Lcfi1465:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1231:
+; NoVLX-NEXT: .Lcfi1466:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1467:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1468:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1469:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1470:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1471:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1232:
+; NoVLX-NEXT: .Lcfi1472:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1233:
+; NoVLX-NEXT: .Lcfi1473:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1234:
+; NoVLX-NEXT: .Lcfi1474:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1475:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1476:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1477:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1478:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1479:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1235:
+; NoVLX-NEXT: .Lcfi1480:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1236:
+; NoVLX-NEXT: .Lcfi1481:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1237:
+; NoVLX-NEXT: .Lcfi1482:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1483:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1484:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1485:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1486:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1487:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1238:
+; NoVLX-NEXT: .Lcfi1488:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1239:
+; NoVLX-NEXT: .Lcfi1489:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1240:
+; NoVLX-NEXT: .Lcfi1490:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1241:
+; NoVLX-NEXT: .Lcfi1491:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1242:
+; NoVLX-NEXT: .Lcfi1492:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1243:
+; NoVLX-NEXT: .Lcfi1493:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1244:
+; NoVLX-NEXT: .Lcfi1494:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1245:
+; NoVLX-NEXT: .Lcfi1495:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1246:
+; NoVLX-NEXT: .Lcfi1496:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1247:
+; NoVLX-NEXT: .Lcfi1497:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1248:
+; NoVLX-NEXT: .Lcfi1498:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1249:
+; NoVLX-NEXT: .Lcfi1499:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1250:
+; NoVLX-NEXT: .Lcfi1500:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1251:
+; NoVLX-NEXT: .Lcfi1501:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1252:
+; NoVLX-NEXT: .Lcfi1502:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1253:
+; NoVLX-NEXT: .Lcfi1503:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1254:
+; NoVLX-NEXT: .Lcfi1504:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1255:
+; NoVLX-NEXT: .Lcfi1505:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1256:
+; NoVLX-NEXT: .Lcfi1506:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1257:
+; NoVLX-NEXT: .Lcfi1507:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1258:
+; NoVLX-NEXT: .Lcfi1508:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1259:
+; NoVLX-NEXT: .Lcfi1509:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1260:
+; NoVLX-NEXT: .Lcfi1510:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1261:
+; NoVLX-NEXT: .Lcfi1511:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1262:
+; NoVLX-NEXT: .Lcfi1512:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1263:
+; NoVLX-NEXT: .Lcfi1513:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1264:
+; NoVLX-NEXT: .Lcfi1514:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1265:
+; NoVLX-NEXT: .Lcfi1515:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1266:
+; NoVLX-NEXT: .Lcfi1516:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1267:
+; NoVLX-NEXT: .Lcfi1517:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1268:
+; NoVLX-NEXT: .Lcfi1518:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1269:
+; NoVLX-NEXT: .Lcfi1519:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1270:
+; NoVLX-NEXT: .Lcfi1520:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1271:
+; NoVLX-NEXT: .Lcfi1521:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1272:
+; NoVLX-NEXT: .Lcfi1522:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rax
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rcx
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm1, %ymm4, %ymm3
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm3
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm0
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm3, %ymm2
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1273:
+; NoVLX-NEXT: .Lcfi1523:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1274:
+; NoVLX-NEXT: .Lcfi1524:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1275:
+; NoVLX-NEXT: .Lcfi1525:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $6, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm0 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm0, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor 32(%rdi), %ymm0, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm0, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1276:
+; NoVLX-NEXT: .Lcfi1526:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1277:
+; NoVLX-NEXT: .Lcfi1527:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1278:
+; NoVLX-NEXT: .Lcfi1528:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm6
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm2
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm3
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm4, %ymm5
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm5
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm5, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2
+; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1279:
+; NoVLX-NEXT: .Lcfi1529:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1280:
+; NoVLX-NEXT: .Lcfi1530:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1281:
+; NoVLX-NEXT: .Lcfi1531:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm1
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm4, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor 32(%rsi), %ymm3, %ymm4
-; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1282:
+; NoVLX-NEXT: .Lcfi1532:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1283:
+; NoVLX-NEXT: .Lcfi1533:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1284:
+; NoVLX-NEXT: .Lcfi1534:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1285:
+; NoVLX-NEXT: .Lcfi1535:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1286:
+; NoVLX-NEXT: .Lcfi1536:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1287:
+; NoVLX-NEXT: .Lcfi1537:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1288:
+; NoVLX-NEXT: .Lcfi1538:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1289:
+; NoVLX-NEXT: .Lcfi1539:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1290:
+; NoVLX-NEXT: .Lcfi1540:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1291:
+; NoVLX-NEXT: .Lcfi1541:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1292:
+; NoVLX-NEXT: .Lcfi1542:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1293:
+; NoVLX-NEXT: .Lcfi1543:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1294:
+; NoVLX-NEXT: .Lcfi1544:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1295:
+; NoVLX-NEXT: .Lcfi1545:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1296:
+; NoVLX-NEXT: .Lcfi1546:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1297:
+; NoVLX-NEXT: .Lcfi1547:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1298:
+; NoVLX-NEXT: .Lcfi1548:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1299:
+; NoVLX-NEXT: .Lcfi1549:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1300:
+; NoVLX-NEXT: .Lcfi1550:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1301:
+; NoVLX-NEXT: .Lcfi1551:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1302:
+; NoVLX-NEXT: .Lcfi1552:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1303:
+; NoVLX-NEXT: .Lcfi1553:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1304:
+; NoVLX-NEXT: .Lcfi1554:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1305:
+; NoVLX-NEXT: .Lcfi1555:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1306:
+; NoVLX-NEXT: .Lcfi1556:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1307:
+; NoVLX-NEXT: .Lcfi1557:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1308:
+; NoVLX-NEXT: .Lcfi1558:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1309:
+; NoVLX-NEXT: .Lcfi1559:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1310:
+; NoVLX-NEXT: .Lcfi1560:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1311:
+; NoVLX-NEXT: .Lcfi1561:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1312:
+; NoVLX-NEXT: .Lcfi1562:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1313:
+; NoVLX-NEXT: .Lcfi1563:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1314:
+; NoVLX-NEXT: .Lcfi1564:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1315:
+; NoVLX-NEXT: .Lcfi1565:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1316:
+; NoVLX-NEXT: .Lcfi1566:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1317:
+; NoVLX-NEXT: .Lcfi1567:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1318:
+; NoVLX-NEXT: .Lcfi1568:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1319:
+; NoVLX-NEXT: .Lcfi1569:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1320:
+; NoVLX-NEXT: .Lcfi1570:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1321:
+; NoVLX-NEXT: .Lcfi1571:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1322:
+; NoVLX-NEXT: .Lcfi1572:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1323:
+; NoVLX-NEXT: .Lcfi1573:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1324:
+; NoVLX-NEXT: .Lcfi1574:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1325:
+; NoVLX-NEXT: .Lcfi1575:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1326:
+; NoVLX-NEXT: .Lcfi1576:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1327:
+; NoVLX-NEXT: .Lcfi1577:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1328:
+; NoVLX-NEXT: .Lcfi1578:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1329:
+; NoVLX-NEXT: .Lcfi1579:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1330:
+; NoVLX-NEXT: .Lcfi1580:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1331:
+; NoVLX-NEXT: .Lcfi1581:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1332:
+; NoVLX-NEXT: .Lcfi1582:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1333:
+; NoVLX-NEXT: .Lcfi1583:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1334:
+; NoVLX-NEXT: .Lcfi1584:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1335:
+; NoVLX-NEXT: .Lcfi1585:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1336:
+; NoVLX-NEXT: .Lcfi1586:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1337:
+; NoVLX-NEXT: .Lcfi1587:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1338:
+; NoVLX-NEXT: .Lcfi1588:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1339:
+; NoVLX-NEXT: .Lcfi1589:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1340:
+; NoVLX-NEXT: .Lcfi1590:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1341:
+; NoVLX-NEXT: .Lcfi1591:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1342:
+; NoVLX-NEXT: .Lcfi1592:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1343:
+; NoVLX-NEXT: .Lcfi1593:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1344:
+; NoVLX-NEXT: .Lcfi1594:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1345:
+; NoVLX-NEXT: .Lcfi1595:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1346:
+; NoVLX-NEXT: .Lcfi1596:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1347:
+; NoVLX-NEXT: .Lcfi1597:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1348:
+; NoVLX-NEXT: .Lcfi1598:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1349:
+; NoVLX-NEXT: .Lcfi1599:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1350:
+; NoVLX-NEXT: .Lcfi1600:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1351:
+; NoVLX-NEXT: .Lcfi1601:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1352:
+; NoVLX-NEXT: .Lcfi1602:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1353:
+; NoVLX-NEXT: .Lcfi1603:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1354:
+; NoVLX-NEXT: .Lcfi1604:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1355:
+; NoVLX-NEXT: .Lcfi1605:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1356:
+; NoVLX-NEXT: .Lcfi1606:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1607:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1608:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1609:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1610:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1611:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1357:
+; NoVLX-NEXT: .Lcfi1612:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1358:
+; NoVLX-NEXT: .Lcfi1613:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1359:
+; NoVLX-NEXT: .Lcfi1614:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1615:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1616:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1617:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1618:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1619:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1360:
+; NoVLX-NEXT: .Lcfi1620:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1361:
+; NoVLX-NEXT: .Lcfi1621:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1362:
+; NoVLX-NEXT: .Lcfi1622:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1623:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1624:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1625:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1626:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1627:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1363:
+; NoVLX-NEXT: .Lcfi1628:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1364:
+; NoVLX-NEXT: .Lcfi1629:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1365:
+; NoVLX-NEXT: .Lcfi1630:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1631:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1632:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1633:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1634:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1635:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1366:
+; NoVLX-NEXT: .Lcfi1636:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1367:
+; NoVLX-NEXT: .Lcfi1637:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1368:
+; NoVLX-NEXT: .Lcfi1638:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1639:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1640:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1641:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1642:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1643:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1369:
+; NoVLX-NEXT: .Lcfi1644:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1370:
+; NoVLX-NEXT: .Lcfi1645:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1371:
+; NoVLX-NEXT: .Lcfi1646:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1647:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1648:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1649:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1650:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1651:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1372:
+; NoVLX-NEXT: .Lcfi1652:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1373:
+; NoVLX-NEXT: .Lcfi1653:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1374:
+; NoVLX-NEXT: .Lcfi1654:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1375:
+; NoVLX-NEXT: .Lcfi1655:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1376:
+; NoVLX-NEXT: .Lcfi1656:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1377:
+; NoVLX-NEXT: .Lcfi1657:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1378:
+; NoVLX-NEXT: .Lcfi1658:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1379:
+; NoVLX-NEXT: .Lcfi1659:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1380:
+; NoVLX-NEXT: .Lcfi1660:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1381:
+; NoVLX-NEXT: .Lcfi1661:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1382:
+; NoVLX-NEXT: .Lcfi1662:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1383:
+; NoVLX-NEXT: .Lcfi1663:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1384:
+; NoVLX-NEXT: .Lcfi1664:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1385:
+; NoVLX-NEXT: .Lcfi1665:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1386:
+; NoVLX-NEXT: .Lcfi1666:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1387:
+; NoVLX-NEXT: .Lcfi1667:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1388:
+; NoVLX-NEXT: .Lcfi1668:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1389:
+; NoVLX-NEXT: .Lcfi1669:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1390:
+; NoVLX-NEXT: .Lcfi1670:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1391:
+; NoVLX-NEXT: .Lcfi1671:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1392:
+; NoVLX-NEXT: .Lcfi1672:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1393:
+; NoVLX-NEXT: .Lcfi1673:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1394:
+; NoVLX-NEXT: .Lcfi1674:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1395:
+; NoVLX-NEXT: .Lcfi1675:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1396:
+; NoVLX-NEXT: .Lcfi1676:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1397:
+; NoVLX-NEXT: .Lcfi1677:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1398:
+; NoVLX-NEXT: .Lcfi1678:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1399:
+; NoVLX-NEXT: .Lcfi1679:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1400:
+; NoVLX-NEXT: .Lcfi1680:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1401:
+; NoVLX-NEXT: .Lcfi1681:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1402:
+; NoVLX-NEXT: .Lcfi1682:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1403:
+; NoVLX-NEXT: .Lcfi1683:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1404:
+; NoVLX-NEXT: .Lcfi1684:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1405:
+; NoVLX-NEXT: .Lcfi1685:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1406:
+; NoVLX-NEXT: .Lcfi1686:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1407:
+; NoVLX-NEXT: .Lcfi1687:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1408:
+; NoVLX-NEXT: .Lcfi1688:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1409:
+; NoVLX-NEXT: .Lcfi1689:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1410:
+; NoVLX-NEXT: .Lcfi1690:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1411:
+; NoVLX-NEXT: .Lcfi1691:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1412:
+; NoVLX-NEXT: .Lcfi1692:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1413:
+; NoVLX-NEXT: .Lcfi1693:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1414:
+; NoVLX-NEXT: .Lcfi1694:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1415:
+; NoVLX-NEXT: .Lcfi1695:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1416:
+; NoVLX-NEXT: .Lcfi1696:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1417:
+; NoVLX-NEXT: .Lcfi1697:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1418:
+; NoVLX-NEXT: .Lcfi1698:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1419:
+; NoVLX-NEXT: .Lcfi1699:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1420:
+; NoVLX-NEXT: .Lcfi1700:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1421:
+; NoVLX-NEXT: .Lcfi1701:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1422:
+; NoVLX-NEXT: .Lcfi1702:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1423:
+; NoVLX-NEXT: .Lcfi1703:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1424:
+; NoVLX-NEXT: .Lcfi1704:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1425:
+; NoVLX-NEXT: .Lcfi1705:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm2
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1426:
+; NoVLX-NEXT: .Lcfi1706:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1427:
+; NoVLX-NEXT: .Lcfi1707:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1428:
+; NoVLX-NEXT: .Lcfi1708:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1429:
+; NoVLX-NEXT: .Lcfi1709:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1430:
+; NoVLX-NEXT: .Lcfi1710:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1431:
+; NoVLX-NEXT: .Lcfi1711:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1432:
+; NoVLX-NEXT: .Lcfi1712:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1433:
+; NoVLX-NEXT: .Lcfi1713:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1434:
+; NoVLX-NEXT: .Lcfi1714:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1435:
+; NoVLX-NEXT: .Lcfi1715:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1436:
+; NoVLX-NEXT: .Lcfi1716:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1437:
+; NoVLX-NEXT: .Lcfi1717:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1438:
+; NoVLX-NEXT: .Lcfi1718:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1439:
+; NoVLX-NEXT: .Lcfi1719:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1440:
+; NoVLX-NEXT: .Lcfi1720:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1441:
+; NoVLX-NEXT: .Lcfi1721:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1442:
+; NoVLX-NEXT: .Lcfi1722:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1443:
+; NoVLX-NEXT: .Lcfi1723:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1444:
+; NoVLX-NEXT: .Lcfi1724:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1445:
+; NoVLX-NEXT: .Lcfi1725:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1446:
+; NoVLX-NEXT: .Lcfi1726:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1447:
+; NoVLX-NEXT: .Lcfi1727:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1448:
+; NoVLX-NEXT: .Lcfi1728:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1449:
+; NoVLX-NEXT: .Lcfi1729:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1450:
+; NoVLX-NEXT: .Lcfi1730:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1451:
+; NoVLX-NEXT: .Lcfi1731:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1452:
+; NoVLX-NEXT: .Lcfi1732:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1453:
+; NoVLX-NEXT: .Lcfi1733:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1454:
+; NoVLX-NEXT: .Lcfi1734:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1455:
+; NoVLX-NEXT: .Lcfi1735:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1456:
+; NoVLX-NEXT: .Lcfi1736:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1457:
+; NoVLX-NEXT: .Lcfi1737:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1458:
+; NoVLX-NEXT: .Lcfi1738:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1459:
+; NoVLX-NEXT: .Lcfi1739:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1460:
+; NoVLX-NEXT: .Lcfi1740:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1461:
+; NoVLX-NEXT: .Lcfi1741:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1462:
+; NoVLX-NEXT: .Lcfi1742:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1463:
+; NoVLX-NEXT: .Lcfi1743:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1464:
+; NoVLX-NEXT: .Lcfi1744:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1465:
+; NoVLX-NEXT: .Lcfi1745:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1466:
+; NoVLX-NEXT: .Lcfi1746:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1467:
+; NoVLX-NEXT: .Lcfi1747:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm2
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1468:
+; NoVLX-NEXT: .Lcfi1748:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1469:
+; NoVLX-NEXT: .Lcfi1749:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1470:
+; NoVLX-NEXT: .Lcfi1750:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1471:
+; NoVLX-NEXT: .Lcfi1751:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1472:
+; NoVLX-NEXT: .Lcfi1752:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1473:
+; NoVLX-NEXT: .Lcfi1753:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1474:
+; NoVLX-NEXT: .Lcfi1754:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1475:
+; NoVLX-NEXT: .Lcfi1755:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1476:
+; NoVLX-NEXT: .Lcfi1756:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1477:
+; NoVLX-NEXT: .Lcfi1757:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1478:
+; NoVLX-NEXT: .Lcfi1758:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1479:
+; NoVLX-NEXT: .Lcfi1759:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1480:
+; NoVLX-NEXT: .Lcfi1760:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1481:
+; NoVLX-NEXT: .Lcfi1761:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1482:
+; NoVLX-NEXT: .Lcfi1762:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1483:
+; NoVLX-NEXT: .Lcfi1763:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1484:
+; NoVLX-NEXT: .Lcfi1764:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1485:
+; NoVLX-NEXT: .Lcfi1765:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1486:
+; NoVLX-NEXT: .Lcfi1766:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1487:
+; NoVLX-NEXT: .Lcfi1767:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1488:
+; NoVLX-NEXT: .Lcfi1768:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1489:
+; NoVLX-NEXT: .Lcfi1769:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1490:
+; NoVLX-NEXT: .Lcfi1770:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1491:
+; NoVLX-NEXT: .Lcfi1771:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1492:
+; NoVLX-NEXT: .Lcfi1772:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1493:
+; NoVLX-NEXT: .Lcfi1773:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1494:
+; NoVLX-NEXT: .Lcfi1774:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1495:
+; NoVLX-NEXT: .Lcfi1775:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1496:
+; NoVLX-NEXT: .Lcfi1776:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1497:
+; NoVLX-NEXT: .Lcfi1777:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1498:
+; NoVLX-NEXT: .Lcfi1778:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1499:
+; NoVLX-NEXT: .Lcfi1779:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1500:
+; NoVLX-NEXT: .Lcfi1780:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1501:
+; NoVLX-NEXT: .Lcfi1781:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1502:
+; NoVLX-NEXT: .Lcfi1782:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1503:
+; NoVLX-NEXT: .Lcfi1783:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1504:
+; NoVLX-NEXT: .Lcfi1784:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1505:
+; NoVLX-NEXT: .Lcfi1785:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1506:
+; NoVLX-NEXT: .Lcfi1786:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1507:
+; NoVLX-NEXT: .Lcfi1787:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1508:
+; NoVLX-NEXT: .Lcfi1788:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1509:
+; NoVLX-NEXT: .Lcfi1789:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1510:
+; NoVLX-NEXT: .Lcfi1790:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1511:
+; NoVLX-NEXT: .Lcfi1791:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1512:
+; NoVLX-NEXT: .Lcfi1792:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1513:
+; NoVLX-NEXT: .Lcfi1793:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1514:
+; NoVLX-NEXT: .Lcfi1794:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1515:
+; NoVLX-NEXT: .Lcfi1795:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1516:
+; NoVLX-NEXT: .Lcfi1796:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1517:
+; NoVLX-NEXT: .Lcfi1797:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1518:
+; NoVLX-NEXT: .Lcfi1798:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1519:
+; NoVLX-NEXT: .Lcfi1799:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1520:
+; NoVLX-NEXT: .Lcfi1800:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1521:
+; NoVLX-NEXT: .Lcfi1801:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1522:
+; NoVLX-NEXT: .Lcfi1802:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1523:
+; NoVLX-NEXT: .Lcfi1803:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1524:
+; NoVLX-NEXT: .Lcfi1804:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1525:
+; NoVLX-NEXT: .Lcfi1805:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1526:
+; NoVLX-NEXT: .Lcfi1806:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1527:
+; NoVLX-NEXT: .Lcfi1807:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
+; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
+; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1528:
+; NoVLX-NEXT: .Lcfi1808:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1529:
+; NoVLX-NEXT: .Lcfi1809:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1530:
+; NoVLX-NEXT: .Lcfi1810:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1531:
+; NoVLX-NEXT: .Lcfi1811:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1532:
+; NoVLX-NEXT: .Lcfi1812:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1533:
+; NoVLX-NEXT: .Lcfi1813:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1534:
+; NoVLX-NEXT: .Lcfi1814:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1535:
+; NoVLX-NEXT: .Lcfi1815:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1536:
+; NoVLX-NEXT: .Lcfi1816:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1537:
+; NoVLX-NEXT: .Lcfi1817:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1538:
+; NoVLX-NEXT: .Lcfi1818:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1539:
+; NoVLX-NEXT: .Lcfi1819:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1540:
+; NoVLX-NEXT: .Lcfi1820:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1541:
+; NoVLX-NEXT: .Lcfi1821:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1542:
+; NoVLX-NEXT: .Lcfi1822:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
+; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1543:
+; NoVLX-NEXT: .Lcfi1823:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1544:
+; NoVLX-NEXT: .Lcfi1824:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1545:
+; NoVLX-NEXT: .Lcfi1825:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1546:
+; NoVLX-NEXT: .Lcfi1826:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1547:
+; NoVLX-NEXT: .Lcfi1827:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1548:
+; NoVLX-NEXT: .Lcfi1828:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1549:
+; NoVLX-NEXT: .Lcfi1829:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1550:
+; NoVLX-NEXT: .Lcfi1830:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1551:
+; NoVLX-NEXT: .Lcfi1831:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1552:
+; NoVLX-NEXT: .Lcfi1832:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1553:
+; NoVLX-NEXT: .Lcfi1833:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1554:
+; NoVLX-NEXT: .Lcfi1834:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1555:
+; NoVLX-NEXT: .Lcfi1835:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1556:
+; NoVLX-NEXT: .Lcfi1836:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1557:
+; NoVLX-NEXT: .Lcfi1837:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1558:
+; NoVLX-NEXT: .Lcfi1838:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1559:
+; NoVLX-NEXT: .Lcfi1839:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1560:
+; NoVLX-NEXT: .Lcfi1840:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1561:
+; NoVLX-NEXT: .Lcfi1841:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1562:
+; NoVLX-NEXT: .Lcfi1842:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1563:
+; NoVLX-NEXT: .Lcfi1843:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1564:
+; NoVLX-NEXT: .Lcfi1844:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1565:
+; NoVLX-NEXT: .Lcfi1845:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1566:
+; NoVLX-NEXT: .Lcfi1846:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1567:
+; NoVLX-NEXT: .Lcfi1847:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1568:
+; NoVLX-NEXT: .Lcfi1848:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1569:
+; NoVLX-NEXT: .Lcfi1849:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1570:
+; NoVLX-NEXT: .Lcfi1850:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1571:
+; NoVLX-NEXT: .Lcfi1851:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1572:
+; NoVLX-NEXT: .Lcfi1852:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1573:
+; NoVLX-NEXT: .Lcfi1853:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1574:
+; NoVLX-NEXT: .Lcfi1854:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1575:
+; NoVLX-NEXT: .Lcfi1855:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1576:
+; NoVLX-NEXT: .Lcfi1856:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1577:
+; NoVLX-NEXT: .Lcfi1857:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1578:
+; NoVLX-NEXT: .Lcfi1858:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1579:
+; NoVLX-NEXT: .Lcfi1859:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1580:
+; NoVLX-NEXT: .Lcfi1860:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1581:
+; NoVLX-NEXT: .Lcfi1861:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1582:
+; NoVLX-NEXT: .Lcfi1862:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1583:
+; NoVLX-NEXT: .Lcfi1863:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1584:
+; NoVLX-NEXT: .Lcfi1864:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1585:
+; NoVLX-NEXT: .Lcfi1865:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1586:
+; NoVLX-NEXT: .Lcfi1866:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1587:
+; NoVLX-NEXT: .Lcfi1867:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1588:
+; NoVLX-NEXT: .Lcfi1868:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1589:
+; NoVLX-NEXT: .Lcfi1869:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1590:
+; NoVLX-NEXT: .Lcfi1870:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1591:
+; NoVLX-NEXT: .Lcfi1871:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1592:
+; NoVLX-NEXT: .Lcfi1872:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1593:
+; NoVLX-NEXT: .Lcfi1873:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1594:
+; NoVLX-NEXT: .Lcfi1874:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1595:
+; NoVLX-NEXT: .Lcfi1875:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1596:
+; NoVLX-NEXT: .Lcfi1876:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1597:
+; NoVLX-NEXT: .Lcfi1877:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1598:
+; NoVLX-NEXT: .Lcfi1878:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1599:
+; NoVLX-NEXT: .Lcfi1879:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1600:
+; NoVLX-NEXT: .Lcfi1880:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1601:
+; NoVLX-NEXT: .Lcfi1881:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1602:
+; NoVLX-NEXT: .Lcfi1882:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1883:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1884:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1885:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1886:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1887:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1603:
+; NoVLX-NEXT: .Lcfi1888:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1604:
+; NoVLX-NEXT: .Lcfi1889:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1605:
+; NoVLX-NEXT: .Lcfi1890:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1891:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1892:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1893:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1894:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1895:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1606:
+; NoVLX-NEXT: .Lcfi1896:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1607:
+; NoVLX-NEXT: .Lcfi1897:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1608:
+; NoVLX-NEXT: .Lcfi1898:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1899:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1900:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1901:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1902:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1903:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1609:
+; NoVLX-NEXT: .Lcfi1904:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1610:
+; NoVLX-NEXT: .Lcfi1905:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1611:
+; NoVLX-NEXT: .Lcfi1906:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1907:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1908:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1909:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1910:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1911:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1612:
+; NoVLX-NEXT: .Lcfi1912:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1613:
+; NoVLX-NEXT: .Lcfi1913:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1614:
+; NoVLX-NEXT: .Lcfi1914:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1915:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1916:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1917:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1918:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1919:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1615:
+; NoVLX-NEXT: .Lcfi1920:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1616:
+; NoVLX-NEXT: .Lcfi1921:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1617:
+; NoVLX-NEXT: .Lcfi1922:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1923:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1924:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1925:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1926:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1927:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
-; VLX: # BB#0: # %entry
-; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
-; VLX-NEXT: kmovw %k0, %eax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
-; NoVLX: # BB#0: # %entry
-; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: retq
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1618:
+; NoVLX-NEXT: .Lcfi1928:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1619:
+; NoVLX-NEXT: .Lcfi1929:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1620:
+; NoVLX-NEXT: .Lcfi1930:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1621:
+; NoVLX-NEXT: .Lcfi1931:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1622:
+; NoVLX-NEXT: .Lcfi1932:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1623:
+; NoVLX-NEXT: .Lcfi1933:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1624:
+; NoVLX-NEXT: .Lcfi1934:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1625:
+; NoVLX-NEXT: .Lcfi1935:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1626:
+; NoVLX-NEXT: .Lcfi1936:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1627:
+; NoVLX-NEXT: .Lcfi1937:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1628:
+; NoVLX-NEXT: .Lcfi1938:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1629:
+; NoVLX-NEXT: .Lcfi1939:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1630:
+; NoVLX-NEXT: .Lcfi1940:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1631:
+; NoVLX-NEXT: .Lcfi1941:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1632:
+; NoVLX-NEXT: .Lcfi1942:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1633:
+; NoVLX-NEXT: .Lcfi1943:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1634:
+; NoVLX-NEXT: .Lcfi1944:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1635:
+; NoVLX-NEXT: .Lcfi1945:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1636:
+; NoVLX-NEXT: .Lcfi1946:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1637:
+; NoVLX-NEXT: .Lcfi1947:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1638:
+; NoVLX-NEXT: .Lcfi1948:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1639:
+; NoVLX-NEXT: .Lcfi1949:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1640:
+; NoVLX-NEXT: .Lcfi1950:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1641:
+; NoVLX-NEXT: .Lcfi1951:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1642:
+; NoVLX-NEXT: .Lcfi1952:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1643:
+; NoVLX-NEXT: .Lcfi1953:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1644:
+; NoVLX-NEXT: .Lcfi1954:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1645:
+; NoVLX-NEXT: .Lcfi1955:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1646:
+; NoVLX-NEXT: .Lcfi1956:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1647:
+; NoVLX-NEXT: .Lcfi1957:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1648:
+; NoVLX-NEXT: .Lcfi1958:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1649:
+; NoVLX-NEXT: .Lcfi1959:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1650:
+; NoVLX-NEXT: .Lcfi1960:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1651:
+; NoVLX-NEXT: .Lcfi1961:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1652:
+; NoVLX-NEXT: .Lcfi1962:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1653:
+; NoVLX-NEXT: .Lcfi1963:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1654:
+; NoVLX-NEXT: .Lcfi1964:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1655:
+; NoVLX-NEXT: .Lcfi1965:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1656:
+; NoVLX-NEXT: .Lcfi1966:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1657:
+; NoVLX-NEXT: .Lcfi1967:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x float>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1968:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1969:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1970:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1971:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1972:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1973:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1974:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1975:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__a to <16 x float>
- %load = load <8 x i64>, <8 x i64>* %__b
- %1 = bitcast <8 x i64> %load to <16 x float>
- %2 = fcmp oeq <16 x float> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
- %6 = bitcast <64 x i1> %5 to i64
- ret i64 %6
-}
-
-define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
-; VLX: # BB#0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
-; NoVLX: # BB#0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1658:
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1659:
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1660:
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1661:
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1662:
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1663:
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1664:
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1665:
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1666:
+; NoVLX-NEXT: .Lcfi1976:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1667:
+; NoVLX-NEXT: .Lcfi1977:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1668:
+; NoVLX-NEXT: .Lcfi1978:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1669:
+; NoVLX-NEXT: .Lcfi1979:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1670:
+; NoVLX-NEXT: .Lcfi1980:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1671:
+; NoVLX-NEXT: .Lcfi1981:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1672:
+; NoVLX-NEXT: .Lcfi1982:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1673:
+; NoVLX-NEXT: .Lcfi1983:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1674:
+; NoVLX-NEXT: .Lcfi1984:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1675:
+; NoVLX-NEXT: .Lcfi1985:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1676:
+; NoVLX-NEXT: .Lcfi1986:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1677:
+; NoVLX-NEXT: .Lcfi1987:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1678:
+; NoVLX-NEXT: .Lcfi1988:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1679:
+; NoVLX-NEXT: .Lcfi1989:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1680:
+; NoVLX-NEXT: .Lcfi1990:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1681:
+; NoVLX-NEXT: .Lcfi1991:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1682:
+; NoVLX-NEXT: .Lcfi1992:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1683:
+; NoVLX-NEXT: .Lcfi1993:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1684:
+; NoVLX-NEXT: .Lcfi1994:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1685:
+; NoVLX-NEXT: .Lcfi1995:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1686:
+; NoVLX-NEXT: .Lcfi1996:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1687:
+; NoVLX-NEXT: .Lcfi1997:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1688:
+; NoVLX-NEXT: .Lcfi1998:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1689:
+; NoVLX-NEXT: .Lcfi1999:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1690:
+; NoVLX-NEXT: .Lcfi2000:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1691:
+; NoVLX-NEXT: .Lcfi2001:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1692:
+; NoVLX-NEXT: .Lcfi2002:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1693:
+; NoVLX-NEXT: .Lcfi2003:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1694:
+; NoVLX-NEXT: .Lcfi2004:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1695:
+; NoVLX-NEXT: .Lcfi2005:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1696:
+; NoVLX-NEXT: .Lcfi2006:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1697:
+; NoVLX-NEXT: .Lcfi2007:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1698:
+; NoVLX-NEXT: .Lcfi2008:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1699:
+; NoVLX-NEXT: .Lcfi2009:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1700:
+; NoVLX-NEXT: .Lcfi2010:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1701:
+; NoVLX-NEXT: .Lcfi2011:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1702:
+; NoVLX-NEXT: .Lcfi2012:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1703:
+; NoVLX-NEXT: .Lcfi2013:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1704:
+; NoVLX-NEXT: .Lcfi2014:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1705:
+; NoVLX-NEXT: .Lcfi2015:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1706:
+; NoVLX-NEXT: .Lcfi2016:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1707:
+; NoVLX-NEXT: .Lcfi2017:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1708:
+; NoVLX-NEXT: .Lcfi2018:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1709:
+; NoVLX-NEXT: .Lcfi2019:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1710:
+; NoVLX-NEXT: .Lcfi2020:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1711:
+; NoVLX-NEXT: .Lcfi2021:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1712:
+; NoVLX-NEXT: .Lcfi2022:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1713:
+; NoVLX-NEXT: .Lcfi2023:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1714:
+; NoVLX-NEXT: .Lcfi2024:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1715:
+; NoVLX-NEXT: .Lcfi2025:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1716:
+; NoVLX-NEXT: .Lcfi2026:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1717:
+; NoVLX-NEXT: .Lcfi2027:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1718:
+; NoVLX-NEXT: .Lcfi2028:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1719:
+; NoVLX-NEXT: .Lcfi2029:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1720:
+; NoVLX-NEXT: .Lcfi2030:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1721:
+; NoVLX-NEXT: .Lcfi2031:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1722:
+; NoVLX-NEXT: .Lcfi2032:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1723:
+; NoVLX-NEXT: .Lcfi2033:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1724:
+; NoVLX-NEXT: .Lcfi2034:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1725:
+; NoVLX-NEXT: .Lcfi2035:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1726:
+; NoVLX-NEXT: .Lcfi2036:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1727:
+; NoVLX-NEXT: .Lcfi2037:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1728:
+; NoVLX-NEXT: .Lcfi2038:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1729:
+; NoVLX-NEXT: .Lcfi2039:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1730:
+; NoVLX-NEXT: .Lcfi2040:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1731:
+; NoVLX-NEXT: .Lcfi2041:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1732:
+; NoVLX-NEXT: .Lcfi2042:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1733:
+; NoVLX-NEXT: .Lcfi2043:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1734:
+; NoVLX-NEXT: .Lcfi2044:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1735:
+; NoVLX-NEXT: .Lcfi2045:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1736:
+; NoVLX-NEXT: .Lcfi2046:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1737:
+; NoVLX-NEXT: .Lcfi2047:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1738:
+; NoVLX-NEXT: .Lcfi2048:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1739:
+; NoVLX-NEXT: .Lcfi2049:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1740:
+; NoVLX-NEXT: .Lcfi2050:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1741:
+; NoVLX-NEXT: .Lcfi2051:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1742:
+; NoVLX-NEXT: .Lcfi2052:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1743:
+; NoVLX-NEXT: .Lcfi2053:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1744:
+; NoVLX-NEXT: .Lcfi2054:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1745:
+; NoVLX-NEXT: .Lcfi2055:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1746:
+; NoVLX-NEXT: .Lcfi2056:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1747:
+; NoVLX-NEXT: .Lcfi2057:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1748:
+; NoVLX-NEXT: .Lcfi2058:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1749:
+; NoVLX-NEXT: .Lcfi2059:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1750:
+; NoVLX-NEXT: .Lcfi2060:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1751:
+; NoVLX-NEXT: .Lcfi2061:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1752:
+; NoVLX-NEXT: .Lcfi2062:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1753:
+; NoVLX-NEXT: .Lcfi2063:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1754:
+; NoVLX-NEXT: .Lcfi2064:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1755:
+; NoVLX-NEXT: .Lcfi2065:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1756:
+; NoVLX-NEXT: .Lcfi2066:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1757:
+; NoVLX-NEXT: .Lcfi2067:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1758:
+; NoVLX-NEXT: .Lcfi2068:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1759:
+; NoVLX-NEXT: .Lcfi2069:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1760:
+; NoVLX-NEXT: .Lcfi2070:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1761:
+; NoVLX-NEXT: .Lcfi2071:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1762:
+; NoVLX-NEXT: .Lcfi2072:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1763:
+; NoVLX-NEXT: .Lcfi2073:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1764:
+; NoVLX-NEXT: .Lcfi2074:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1765:
+; NoVLX-NEXT: .Lcfi2075:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1766:
+; NoVLX-NEXT: .Lcfi2076:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1767:
+; NoVLX-NEXT: .Lcfi2077:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1768:
+; NoVLX-NEXT: .Lcfi2078:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1769:
+; NoVLX-NEXT: .Lcfi2079:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1770:
+; NoVLX-NEXT: .Lcfi2080:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1771:
+; NoVLX-NEXT: .Lcfi2081:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1772:
+; NoVLX-NEXT: .Lcfi2082:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1773:
+; NoVLX-NEXT: .Lcfi2083:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>