From c4c427ffea861bf35955fe653827bc51bca1846c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2019 14:04:19 +0000 Subject: [PATCH] [X86] SimplifyDemandedVectorElts - add X86ISD::VPERMIL2 mask support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358167 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- .../X86/vector-shuffle-combining-xop.ll | 24 ++++++------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 223d0923c7f..a896f24c365 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33234,8 +33234,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::VPPERM: { - // TODO - simplify other variable shuffle masks. + case X86ISD::VPPERM: + case X86ISD::VPERMIL2: { SDValue Mask = Op.getOperand(2); APInt MaskUndef, MaskZero; if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO, diff --git a/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 3d57a09ffb5..e785530ccd5 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -153,23 +153,13 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x ; X86-AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] ; X86-AVX2-NEXT: retl ; -; X64-AVX-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = -; X64-AVX-NEXT: vpinsrq $0, %rdi, %xmm2, %xmm2 -; X64-AVX-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3] -; X64-AVX-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 -; X64-AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] -; X64-AVX-NEXT: retq -; -; X64-AVX2-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = -; X64-AVX2-NEXT: vpinsrq $0, %rdi, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],mem[4,5,6,7] -; X64-AVX2-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] -; X64-AVX2-NEXT: retq +; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd: +; X64: # %bb.0: +; X64-NEXT: vmovapd {{.*#+}} xmm2 = +; X64-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3] +; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] +; X64-NEXT: retq %res0 = insertelement <4 x i64> , i64 %a2, i32 0 %res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0) %res2 = shufflevector <4 x double> %res1, <4 x double> undef, <4 x i32> -- 2.40.0