From: Simon Pilgrim Date: Sat, 17 Sep 2016 17:40:40 +0000 (+0000) Subject: [X86][XOP] Add target shuffle constant folding tests X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aa105ad95d31b703aa5b73ffc534cac8dd8919db;p=llvm [X86][XOP] Add target shuffle constant folding tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281828 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/test/CodeGen/X86/vector-shuffle-combining-xop.ll index ae6df60a199..4b760d11e02 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -260,3 +260,93 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) { %res3 = bitcast <16 x i8> %res2 to <4 x i32> ret <4 x i32> %res3 } + +define <2 x double> @constant_fold_vpermil2pd() { +; X32-LABEL: constant_fold_vpermil2pd: +; X32: # BB#0: +; X32-NEXT: vmovapd {{.*#+}} xmm0 = [-2.000000e+00,-1.000000e+00] +; X32-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00] +; X32-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; X32-NEXT: retl +; +; X64-LABEL: constant_fold_vpermil2pd: +; X64: # BB#0: +; X64-NEXT: vmovapd {{.*#+}} xmm0 = [-2.000000e+00,-1.000000e+00] +; X64-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00] +; X64-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; X64-NEXT: retq + %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> , <2 x double> , <2 x i64> , i8 2) + ret <2 x double> %1 +} + +define <4 x double> @constant_fold_vpermil2pd_256() { +; X32-LABEL: constant_fold_vpermil2pd_256: +; X32: # BB#0: +; X32-NEXT: vmovapd {{.*#+}} ymm0 = [-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X32-NEXT: vmovapd {{.*#+}} ymm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] +; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3,2] +; X32-NEXT: retl +; +; X64-LABEL: constant_fold_vpermil2pd_256: +; X64: # BB#0: +; X64-NEXT: vmovapd {{.*#+}} ymm0 = [-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X64-NEXT: vmovapd {{.*#+}} ymm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] +; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3,2] +; X64-NEXT: retq + %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> , <4 x double> , <4 x i64> , i8 2) + ret <4 x double> %1 +} + +define <4 x float> @constant_fold_vpermil2ps() { +; X32-LABEL: constant_fold_vpermil2ps: +; X32: # BB#0: +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X32-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] +; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,2],zero +; X32-NEXT: retl +; +; X64-LABEL: constant_fold_vpermil2ps: +; X64: # BB#0: +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X64-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] +; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,2],zero +; X64-NEXT: retq + %1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> , <4 x float> , <4 x i32> , i8 2) + ret <4 x float> %1 +} + +define <8 x float> @constant_fold_vpermil2ps_256() { +; X32-LABEL: constant_fold_vpermil2ps_256: +; X32: # BB#0: +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8.000000e+00,-7.000000e+00,-6.000000e+00,-5.000000e+00,-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X32-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00] +; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0,2],zero,ymm1[4],zero,ymm1[4,6] +; X32-NEXT: retl +; +; X64-LABEL: constant_fold_vpermil2ps_256: +; X64: # BB#0: +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8.000000e+00,-7.000000e+00,-6.000000e+00,-5.000000e+00,-4.000000e+00,-3.000000e+00,-2.000000e+00,-1.000000e+00] +; X64-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00] +; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0,2],zero,ymm1[4],zero,ymm1[4,6] +; X64-NEXT: retq + %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> , <8 x float> , <8 x i32> , i8 2) + ret <8 x float> %1 +} + +define <16 x i8> @constant_fold_vpperm() { +; X32-LABEL: constant_fold_vpperm: +; X32: # BB#0: +; X32-NEXT: vmovdqa {{.*#+}} xmm0 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] +; X32-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X32-NEXT: retl +; +; X64-LABEL: constant_fold_vpperm: +; X64: # BB#0: +; X64-NEXT: vmovdqa {{.*#+}} xmm0 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] +; X64-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X64-NEXT: retq + %1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> , <16 x i8> , <16 x i8> ) + ret <16 x i8> %1 +}