From 96d65a29e3d51c9da7fab69acb330f74df9b7110 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 3 Apr 2019 00:05:03 +0000 Subject: [PATCH] [X86] Update the test case for v4i1 bitselect in combine-bitselect.ll to not have an infinite loop in IR. In fact we don't even need a loop at all. I backed out the bug fix this was testing for and verified that this new case hit the same issue. This should stop D59626 from deleting some of this code by realizing it was dead due to the loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357544 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/combine-bitselect.ll | 110 +++++++++++--------------- 1 file changed, 48 insertions(+), 62 deletions(-) diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll index 973f5c2e31a..9a911ddffef 100644 --- a/test/CodeGen/X86/combine-bitselect.ll +++ b/test/CodeGen/X86/combine-bitselect.ll @@ -593,83 +593,69 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n } ; Check that mask registers don't get canonicalized. -define void @bitselect_v4i1_loop(<4 x i32> %a0) { +define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: bitselect_v4i1_loop: ; SSE: # %bb.0: # %bb -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 -; SSE-NEXT: movdqa %xmm1, %xmm3 -; SSE-NEXT: pandn %xmm0, %xmm3 -; SSE-NEXT: .p2align 4, 0x90 -; SSE-NEXT: .LBB12_1: # %bb1 -; SSE-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE-NEXT: pand %xmm1, %xmm2 -; SSE-NEXT: por %xmm3, %xmm2 -; SSE-NEXT: jmp .LBB12_1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] +; SSE-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd {{.*}}(%rip), %xmm1 +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pandn %xmm0, %xmm2 +; SSE-NEXT: por %xmm1, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq ; ; XOP-LABEL: bitselect_v4i1_loop: ; XOP: # %bb.0: # %bb -; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; XOP-NEXT: vpcomneqd %xmm1, %xmm0, %xmm1 -; XOP-NEXT: vpcomeqd {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: vmovdqa %xmm1, %xmm2 -; XOP-NEXT: .p2align 4, 0x90 -; XOP-NEXT: .LBB12_1: # %bb1 -; XOP-NEXT: # =>This Inner Loop Header: Depth=1 -; XOP-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm2 -; XOP-NEXT: jmp .LBB12_1 +; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; XOP-NEXT: vpcomneqd %xmm2, %xmm0, %xmm0 +; XOP-NEXT: vpcomeqd {{.*}}(%rip), %xmm1, %xmm2 +; XOP-NEXT: vpcomeqd {{.*}}(%rip), %xmm1, %xmm1 +; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v4i1_loop: ; AVX1: # %bb.0: # %bb -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: .p2align 4, 0x90 -; AVX1-NEXT: .LBB12_1: # %bb1 -; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: jmp .LBB12_1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: retq ; ; AVX2-LABEL: bitselect_v4i1_loop: ; AVX2: # %bb.0: # %bb -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [12,12,12,12] -; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: .p2align 4, 0x90 -; AVX2-NEXT: .LBB12_1: # %bb1 -; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: jmp .LBB12_1 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12] +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [15,15,15,15] +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: bitselect_v4i1_loop: ; AVX512F: # %bb.0: # %bb +; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] -; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k2 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} -; AVX512F-NEXT: .p2align 4, 0x90 -; AVX512F-NEXT: .LBB12_1: # %bb1 -; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1} -; AVX512F-NEXT: korw %k1, %k0, %k1 -; AVX512F-NEXT: jmp .LBB12_1 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12] +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k2 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} +; AVX512F-NEXT: korw %k0, %k1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq bb: %tmp = icmp ne <4 x i32> %a0, zeroinitializer - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp2 = phi <4 x i1> [ %tmp, %bb ], [ %tmp4, %bb1 ] - %tmp3 = icmp eq <4 x i32> %a0, - %tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp3, <4 x i1> %tmp2 - %tmp5 = and <4 x i1> %tmp4, %tmp - br label %bb1 + %tmp2 = icmp eq <4 x i32> %a1, + %tmp3 = icmp eq <4 x i32> %a1, + %tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp2, <4 x i1> %tmp3 + ret <4 x i1> %tmp4 } + -- 2.50.1