From: Craig Topper Date: Thu, 3 Jan 2019 22:31:07 +0000 (+0000) Subject: [X86] Add test case for D56283. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=822a4f8574c4dfe58c643b2384dc1da8b7c78301;p=llvm [X86] Add test case for D56283. This tests a case where we need to be able to compute sign bits for two insert_subvectors that is a liveout of a basic block. The result is then used as a boolean vector in another basic block. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350359 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index a6d976ed7cd..02362bde81a 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -385,3 +385,69 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x %6 = sitofp <4 x i64> %5 to <4 x float> ret <4 x float> %6 } + +; Make sure we can preserve sign bit information into the second basic block +; so we can avoid having to shift bit 0 into bit 7 for each element due to +; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires +; ComputeNumSignBits handling for insert_subvector. +define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 x i8> %z) { +; X32-LABEL: cross_bb_signbits_insert_subvec: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 +; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 +; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X32-NEXT: vpsllw $7, %xmm3, %xmm3 +; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X32-NEXT: vpand %xmm4, %xmm3, %xmm3 +; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3 +; X32-NEXT: vpsllw $7, %xmm0, %xmm0 +; X32-NEXT: vpand %xmm4, %xmm0, %xmm0 +; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1 +; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X32-NEXT: vmovaps %ymm0, (%eax) +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: cross_bb_signbits_insert_subvec: +; X64: # %bb.0: +; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 +; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 +; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X64-NEXT: vpsllw $7, %xmm3, %xmm3 +; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-NEXT: vpand %xmm4, %xmm3, %xmm3 +; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3 +; X64-NEXT: vpsllw $7, %xmm0, %xmm0 +; X64-NEXT: vpand %xmm4, %xmm0, %xmm0 +; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1 +; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X64-NEXT: vmovaps %ymm0, (%rdi) +; X64-NEXT: vzeroupper +; X64-NEXT: retq + %a = icmp eq <32 x i8> %x, zeroinitializer + %b = icmp eq <32 x i8> %x, zeroinitializer + %c = and <32 x i1> %a, %b + br label %block + +block: + %d = select <32 x i1> %c, <32 x i8> , <32 x i8> %z + store <32 x i8> %d, <32 x i8>* %ptr, align 32 + br label %exit + +exit: + ret void +} +