From: Simon Pilgrim Date: Mon, 16 Jan 2017 21:34:22 +0000 (+0000) Subject: [InstCombine][AVX] Tests showing missed opportunities to pass demanded elts through... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=663d71421ca74574c6ebea4b05cde62f0be24522;p=llvm [InstCombine][AVX] Tests showing missed opportunities to pass demanded elts through a permilpd/permilps shuffle mask git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292165 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Transforms/InstCombine/x86-vpermil.ll b/test/Transforms/InstCombine/x86-vpermil.ll index fad10d7ad5c..44eeb57a89d 100644 --- a/test/Transforms/InstCombine/x86-vpermil.ll +++ b/test/Transforms/InstCombine/x86-vpermil.ll @@ -221,6 +221,81 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) { ret <8 x double> %a } +; TODO: Simplify demanded elts + +define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> , i32 %a1, i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; + %1 = insertelement <4 x i32> , i32 %a1, i32 3 + %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1) + %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> + ret <4 x float> %3 +} + +define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> , <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> [[TMP1]]) +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; + %1 = shufflevector <8 x i32> %a1, <8 x i32> , <8 x i32> + %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1) + %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> + ret <8 x float> %3 +} + +define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { +; CHECK-LABEL: @elts_test_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> %a1, i32 %a2, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> [[TMP1]]) +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 + %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1) + %3 = shufflevector <16 x float> %2, <16 x float> undef, <16 x i32> + ret <16 x float> %3 +} + +define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 %a1, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x i64> , i64 %a1, i32 1 + %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1) + %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> + ret <2 x double> %3 +} + +define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> , <4 x i64> %a1, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> [[TMP1]]) +; CHECK-NEXT: ret <4 x double> [[TMP2]] +; + %1 = shufflevector <4 x i64> , <4 x i64> %a1, <4 x i32> + %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1) + %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> + ret <4 x double> %3 +} + +define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) { +; CHECK-LABEL: @elts_test_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i64> %a1, i64 %a2, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0 + %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1) + %3 = shufflevector <8 x double> %2, <8 x double> undef, <8 x i32> zeroinitializer + ret <8 x double> %3 +} + declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)