From 527dfff180d1d39da0e941eddea2a04b2cc409ff Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Jan 2019 07:12:54 +0000 Subject: [PATCH] [X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero. The test cases are constructed to avoid folding the AND into a masked compare operation. Currently we emit a KAND and a KORTEST for these cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350287 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx512-mask-op.ll | 726 +++++++++++++++++++++++++++++ 1 file changed, 726 insertions(+) diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 5996de5455d..25173709bd7 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -3479,3 +3479,729 @@ define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) { ret void } declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) + +define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { +; KNL-LABEL: ktest_3: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 +; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 +; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 +; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 +; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2 +; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3 +; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: korw %k3, %k2, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testb %al, %al +; KNL-NEXT: je LBB71_1 +; KNL-NEXT: ## %bb.2: ## %exit +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB71_1: ## %bar +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_3: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0 +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: je LBB71_1 +; SKX-NEXT: ## %bb.2: ## %exit +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB71_1: ## %bar +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_3: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 +; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2 +; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3 +; AVX512BW-NEXT: korw %k1, %k0, %k0 +; AVX512BW-NEXT: korw %k3, %k2, %k1 +; AVX512BW-NEXT: kandw %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testb %al, %al +; AVX512BW-NEXT: je LBB71_1 +; AVX512BW-NEXT: ## %bb.2: ## %exit +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB71_1: ## %bar +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_3: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 +; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2 +; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3 +; AVX512DQ-NEXT: korb %k1, %k0, %k0 +; AVX512DQ-NEXT: korb %k3, %k2, %k1 +; AVX512DQ-NEXT: kandb %k1, %k0, %k0 +; AVX512DQ-NEXT: kortestb %k0, %k0 +; AVX512DQ-NEXT: je LBB71_1 +; AVX512DQ-NEXT: ## %bb.2: ## %exit +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB71_1: ## %bar +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq +; +; X86-LABEL: ktest_3: +; X86: ## %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0 +; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; X86-NEXT: korb %k1, %k0, %k0 +; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2 +; X86-NEXT: korb %k2, %k1, %k1 +; X86-NEXT: kandb %k1, %k0, %k0 +; X86-NEXT: kortestb %k0, %k0 +; X86-NEXT: je LBB71_1 +; X86-NEXT: ## %bb.2: ## %exit +; X86-NEXT: addl $12, %esp +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; X86-NEXT: LBB71_1: ## %bar +; X86-NEXT: vzeroupper +; X86-NEXT: calll _foo +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl + %a = icmp eq <8 x i32> %w, zeroinitializer + %b = icmp eq <8 x i32> %x, zeroinitializer + %c = icmp eq <8 x i32> %y, zeroinitializer + %d = icmp eq <8 x i32> %z, zeroinitializer + %e = or <8 x i1> %a, %b + %f = or <8 x i1> %c, %d + %g = and <8 x i1> %e, %f + %h = bitcast <8 x i1> %g to i8 + %i = icmp eq i8 %h, 0 + br i1 %i, label %bar, label %exit + +bar: + call void @foo() + br label %exit + +exit: + ret void +} + +define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; KNL-LABEL: ktest_4: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2 +; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3 +; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: korw %k3, %k2, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testb %al, %al +; KNL-NEXT: je LBB72_1 +; KNL-NEXT: ## %bb.2: ## %exit +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB72_1: ## %bar +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_4: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2 +; SKX-NEXT: korb %k2, %k1, %k1 +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: je LBB72_1 +; SKX-NEXT: ## %bb.2: ## %exit +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB72_1: ## %bar +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_4: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2 +; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3 +; AVX512BW-NEXT: korw %k1, %k0, %k0 +; AVX512BW-NEXT: korw %k3, %k2, %k1 +; AVX512BW-NEXT: kandw %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testb %al, %al +; AVX512BW-NEXT: je LBB72_1 +; AVX512BW-NEXT: ## %bb.2: ## %exit +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB72_1: ## %bar +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_4: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; AVX512DQ-NEXT: korb %k1, %k0, %k0 +; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2 +; AVX512DQ-NEXT: korb %k2, %k1, %k1 +; AVX512DQ-NEXT: kandb %k1, %k0, %k0 +; AVX512DQ-NEXT: kortestb %k0, %k0 +; AVX512DQ-NEXT: je LBB72_1 +; AVX512DQ-NEXT: ## %bb.2: ## %exit +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB72_1: ## %bar +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq +; +; X86-LABEL: ktest_4: +; X86: ## %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; X86-NEXT: korb %k1, %k0, %k0 +; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2 +; X86-NEXT: korb %k2, %k1, %k1 +; X86-NEXT: kandb %k1, %k0, %k0 +; X86-NEXT: kortestb %k0, %k0 +; X86-NEXT: je LBB72_1 +; X86-NEXT: ## %bb.2: ## %exit +; X86-NEXT: addl $12, %esp +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; X86-NEXT: LBB72_1: ## %bar +; X86-NEXT: vzeroupper +; X86-NEXT: calll _foo +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl + %a = icmp eq <8 x i64> %w, zeroinitializer + %b = icmp eq <8 x i64> %x, zeroinitializer + %c = icmp eq <8 x i64> %y, zeroinitializer + %d = icmp eq <8 x i64> %z, zeroinitializer + %e = or <8 x i1> %a, %b + %f = or <8 x i1> %c, %d + %g = and <8 x i1> %e, %f + %h = bitcast <8 x i1> %g to i8 + %i = icmp eq i8 %h, 0 + br i1 %i, label %bar, label %exit + +bar: + call void @foo() + br label %exit + +exit: + ret void +} + +define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) { +; CHECK-LABEL: ktest_5: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; CHECK-NEXT: korw %k2, %k1, %k1 +; CHECK-NEXT: kandw %k1, %k0, %k0 +; CHECK-NEXT: kortestw %k0, %k0 +; CHECK-NEXT: je LBB73_1 +; CHECK-NEXT: ## %bb.2: ## %exit +; CHECK-NEXT: popq %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +; CHECK-NEXT: LBB73_1: ## %bar +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq _foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; +; X86-LABEL: ktest_5: +; X86: ## %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; X86-NEXT: korw %k1, %k0, %k0 +; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; X86-NEXT: korw %k2, %k1, %k1 +; X86-NEXT: kandw %k1, %k0, %k0 +; X86-NEXT: kortestw %k0, %k0 +; X86-NEXT: je LBB73_1 +; X86-NEXT: ## %bb.2: ## %exit +; X86-NEXT: addl $12, %esp +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; X86-NEXT: LBB73_1: ## %bar +; X86-NEXT: vzeroupper +; X86-NEXT: calll _foo +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl + %a = icmp eq <16 x i32> %w, zeroinitializer + %b = icmp eq <16 x i32> %x, zeroinitializer + %c = icmp eq <16 x i32> %y, zeroinitializer + %d = icmp eq <16 x i32> %z, zeroinitializer + %e = or <16 x i1> %a, %b + %f = or <16 x i1> %c, %d + %g = and <16 x i1> %e, %f + %h = bitcast <16 x i1> %g to i16 + %i = icmp eq i16 %h, 0 + br i1 %i, label %bar, label %exit + +bar: + call void @foo() + br label %exit + +exit: + ret void +} + +define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) { +; KNL-LABEL: ktest_6: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2 +; KNL-NEXT: vpor %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2 +; KNL-NEXT: vpor %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4 +; KNL-NEXT: vpor %ymm4, %ymm2, %ymm2 +; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2 +; KNL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpmovsxwd %ymm1, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: je LBB74_1 +; KNL-NEXT: ## %bb.2: ## %exit +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB74_1: ## %bar +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_6: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: kord %k1, %k0, %k0 +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2 +; SKX-NEXT: kord %k2, %k1, %k1 +; SKX-NEXT: kandd %k1, %k0, %k0 +; SKX-NEXT: kortestd %k0, %k0 +; SKX-NEXT: je LBB74_1 +; SKX-NEXT: ## %bb.2: ## %exit +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB74_1: ## %bar +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_6: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; AVX512BW-NEXT: kord %k1, %k0, %k0 +; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2 +; AVX512BW-NEXT: kord %k2, %k1, %k1 +; AVX512BW-NEXT: kandd %k1, %k0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: je LBB74_1 +; AVX512BW-NEXT: ## %bb.2: ## %exit +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB74_1: ## %bar +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_6: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4 +; AVX512DQ-NEXT: vpor %ymm4, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2 +; AVX512DQ-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %ecx +; AVX512DQ-NEXT: shll $16, %ecx +; AVX512DQ-NEXT: orl %eax, %ecx +; AVX512DQ-NEXT: je LBB74_1 +; AVX512DQ-NEXT: ## %bb.2: ## %exit +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB74_1: ## %bar +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq +; +; X86-LABEL: ktest_6: +; X86: ## %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; X86-NEXT: kord %k1, %k0, %k0 +; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2 +; X86-NEXT: kord %k2, %k1, %k1 +; X86-NEXT: kandd %k1, %k0, %k0 +; X86-NEXT: kortestd %k0, %k0 +; X86-NEXT: je LBB74_1 +; X86-NEXT: ## %bb.2: ## %exit +; X86-NEXT: addl $12, %esp +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; X86-NEXT: LBB74_1: ## %bar +; X86-NEXT: vzeroupper +; X86-NEXT: calll _foo +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl + %a = icmp eq <32 x i16> %w, zeroinitializer + %b = icmp eq <32 x i16> %x, zeroinitializer + %c = icmp eq <32 x i16> %y, zeroinitializer + %d = icmp eq <32 x i16> %z, zeroinitializer + %e = or <32 x i1> %a, %b + %f = or <32 x i1> %c, %d + %g = and <32 x i1> %e, %f + %h = bitcast <32 x i1> %g to i32 + %i = icmp eq i32 %h, 0 + br i1 %i, label %bar, label %exit + +bar: + call void @foo() + br label %exit + +exit: + ret void +} + +define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) { +; KNL-LABEL: ktest_7: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9 +; KNL-NEXT: vextracti128 $1, %ymm9, %xmm0 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10 +; KNL-NEXT: vextracti128 $1, %ymm10, %xmm1 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11 +; KNL-NEXT: vextracti128 $1, %ymm11, %xmm2 +; KNL-NEXT: vpor %xmm2, %xmm0, %xmm13 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2 +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 +; KNL-NEXT: vpor %xmm3, %xmm1, %xmm12 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3 +; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5 +; KNL-NEXT: vextracti128 $1, %ymm5, %xmm1 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6 +; KNL-NEXT: vextracti128 $1, %ymm6, %xmm0 +; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0 +; KNL-NEXT: vpand %xmm0, %xmm13, %xmm0 +; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4 +; KNL-NEXT: vextracti128 $1, %ymm4, %xmm7 +; KNL-NEXT: vpor %xmm7, %xmm1, %xmm1 +; KNL-NEXT: vpand %xmm1, %xmm12, %xmm1 +; KNL-NEXT: vpor %xmm2, %xmm10, %xmm2 +; KNL-NEXT: vpor %xmm11, %xmm9, %xmm7 +; KNL-NEXT: vpor %xmm4, %xmm5, %xmm4 +; KNL-NEXT: vpand %xmm4, %xmm2, %xmm2 +; KNL-NEXT: vpor %xmm6, %xmm3, %xmm3 +; KNL-NEXT: vpand %xmm3, %xmm7, %xmm3 +; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 +; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm2, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: shll $16, %edx +; KNL-NEXT: orl %eax, %edx +; KNL-NEXT: shlq $32, %rdx +; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: je LBB75_1 +; KNL-NEXT: ## %bb.2: ## %exit +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB75_1: ## %bar +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_7: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: korq %k1, %k0, %k0 +; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2 +; SKX-NEXT: korq %k2, %k1, %k1 +; SKX-NEXT: kandq %k1, %k0, %k0 +; SKX-NEXT: kortestq %k0, %k0 +; SKX-NEXT: je LBB75_1 +; SKX-NEXT: ## %bb.2: ## %exit +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB75_1: ## %bar +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_7: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; AVX512BW-NEXT: korq %k1, %k0, %k0 +; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2 +; AVX512BW-NEXT: korq %k2, %k1, %k1 +; AVX512BW-NEXT: kandq %k1, %k0, %k0 +; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: je LBB75_1 +; AVX512BW-NEXT: ## %bb.2: ## %exit +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB75_1: ## %bar +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_7: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9 +; AVX512DQ-NEXT: vextracti128 $1, %ymm9, %xmm0 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10 +; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm1 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11 +; AVX512DQ-NEXT: vextracti128 $1, %ymm11, %xmm2 +; AVX512DQ-NEXT: vpor %xmm2, %xmm0, %xmm13 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm12 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3 +; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5 +; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm1 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6 +; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm0 +; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0 +; AVX512DQ-NEXT: vpand %xmm0, %xmm13, %xmm0 +; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4 +; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm7 +; AVX512DQ-NEXT: vpor %xmm7, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpand %xmm1, %xmm12, %xmm1 +; AVX512DQ-NEXT: vpor %xmm2, %xmm10, %xmm2 +; AVX512DQ-NEXT: vpor %xmm11, %xmm9, %xmm7 +; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm4 +; AVX512DQ-NEXT: vpand %xmm4, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpor %xmm6, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpand %xmm3, %xmm7, %xmm3 +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %ecx +; AVX512DQ-NEXT: shll $16, %ecx +; AVX512DQ-NEXT: orl %eax, %ecx +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %edx +; AVX512DQ-NEXT: shll $16, %edx +; AVX512DQ-NEXT: orl %eax, %edx +; AVX512DQ-NEXT: shlq $32, %rdx +; AVX512DQ-NEXT: orq %rcx, %rdx +; AVX512DQ-NEXT: je LBB75_1 +; AVX512DQ-NEXT: ## %bb.2: ## %exit +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB75_1: ## %bar +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq +; +; X86-LABEL: ktest_7: +; X86: ## %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0 +; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; X86-NEXT: korq %k1, %k0, %k0 +; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2 +; X86-NEXT: korq %k2, %k1, %k1 +; X86-NEXT: kandq %k1, %k0, %k0 +; X86-NEXT: kshiftrq $32, %k0, %k1 +; X86-NEXT: kortestd %k1, %k0 +; X86-NEXT: je LBB75_1 +; X86-NEXT: ## %bb.2: ## %exit +; X86-NEXT: addl $12, %esp +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; X86-NEXT: LBB75_1: ## %bar +; X86-NEXT: vzeroupper +; X86-NEXT: calll _foo +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl + %a = icmp eq <64 x i8> %w, zeroinitializer + %b = icmp eq <64 x i8> %x, zeroinitializer + %c = icmp eq <64 x i8> %y, zeroinitializer + %d = icmp eq <64 x i8> %z, zeroinitializer + %e = or <64 x i1> %a, %b + %f = or <64 x i1> %c, %d + %g = and <64 x i1> %e, %f + %h = bitcast <64 x i1> %g to i64 + %i = icmp eq i64 %h, 0 + br i1 %i, label %bar, label %exit + +bar: + call void @foo() + br label %exit + +exit: + ret void +} -- 2.50.1