From 50d76a80ed48a91480bc8d95906cac5a5eda90c5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 16 Jan 2017 17:26:23 +0000 Subject: [PATCH] [InstCombine][SSE] Tests showing missed opportunities to pass demanded elts through a packss/packus truncation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292144 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/InstCombine/x86-pack.ll | 129 ++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 test/Transforms/InstCombine/x86-pack.ll diff --git a/test/Transforms/InstCombine/x86-pack.ll b/test/Transforms/InstCombine/x86-pack.ll new file mode 100644 index 00000000000..0515dfea6b1 --- /dev/null +++ b/test/Transforms/InstCombine/x86-pack.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; +; Demanded Elts +; + +define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @elts_packssdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> + %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> + %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> + ret <8 x i16> %4 +} + +define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @elts_packusdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> %a0, i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> %a1, i32 0, i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = insertelement <4 x i32> %a0, i32 0, i32 0 + %2 = insertelement <4 x i32> %a1, i32 0, i32 3 + %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> + ret <8 x i16> %4 +} + +define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @elts_packsswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> %a0, i16 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> %a1, i16 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %1 = insertelement <8 x i16> %a0, i16 0, i32 0 + %2 = insertelement <8 x i16> %a1, i16 0, i32 0 + %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) + %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> + ret <16 x i8> %4 +} + +define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @elts_packuswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> ) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %1 = insertelement <8 x i16> undef, i16 0, i32 0 + %2 = insertelement <8 x i16> undef, i16 0, i32 0 + %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) + %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> + ret <16 x i8> %4 +} + +define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @elts_packssdw_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> + %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> + ret <16 x i16> %4 +} + +define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @elts_packusdw_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> + %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2) + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> + ret <16 x i16> %4 +} + +define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: @elts_packsswb_256( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> %a0, i16 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> %a1, i16 0, i32 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP3]], <32 x i8> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP4]] +; + %1 = insertelement <16 x i16> %a0, i16 0, i32 0 + %2 = insertelement <16 x i16> %a1, i16 0, i32 8 + %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> + ret <32 x i8> %4 +} + +define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: @elts_packuswb_256( +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> ) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %1 = insertelement <16 x i16> undef, i16 0, i32 1 + %2 = insertelement <16 x i16> undef, i16 0, i32 0 + %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer + ret <32 x i8> %4 +} + +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + +declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone +declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone +declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone +declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone -- 2.40.0