From: Craig Topper Date: Fri, 11 Oct 2019 07:24:36 +0000 (+0000) Subject: [X86] Add v8i64->v8i8 ssat/usat/packus truncate tests to min-legal-vector-width.ll X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a621fe08538e8792eeb89418f6bd1c5f306a4030;p=llvm [X86] Add v8i64->v8i8 ssat/usat/packus truncate tests to min-legal-vector-width.ll I wonder if we should split the v8i8 stores in order to form two v4i8 saturating truncating stores. This would remove the unpckl needed to concatenated the v4i8 results to make a single store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374519 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/min-legal-vector-width.ll b/test/CodeGen/X86/min-legal-vector-width.ll index eb90a2ae634..49dae99268d 100644 --- a/test/CodeGen/X86/min-legal-vector-width.ll +++ b/test/CodeGen/X86/min-legal-vector-width.ll @@ -1142,3 +1142,106 @@ define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32>* %p) "min-legal-vector-wi ret <32 x i8> %f } +define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + ret <8 x i8> %3 +} + +define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + store <8 x i8> %3, <8 x i8> *%p1 + ret void +}