From 2e1720f4556e47b98353c3a901913d0f1ba54944 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 2 Jul 2016 12:45:03 +0000 Subject: [PATCH] [X86][AVX512] Add support for lowering shuffles to MOVDDUP/MOVSLDUP/MOVSHDUP git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++++++ test/CodeGen/X86/vector-shuffle-512-v16.ll | 6 ++---- test/CodeGen/X86/vector-shuffle-512-v8.ll | 6 ++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4417d42088c..b06736fbdd3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11716,6 +11716,12 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + if (V2.isUndef()) { + // Use low duplicate instructions for masks that match their pattern. + if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6})) + return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1); + } + if (SDValue Shuf128 = lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Shuf128; @@ -11736,6 +11742,19 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + // If the shuffle mask is repeated in each 128-bit lane, we have many more + // options to efficiently lower the shuffle. + SmallVector RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) { + assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); + + // Use even/odd duplicate instructions for masks that match their pattern. + if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2})) + return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); + if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3})) + return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); + } + if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) return Unpck; diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 51855780f58..5f01909eb31 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -92,8 +92,7 @@ define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) { ; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: ; ALL: # BB#0: -; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] -; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; ALL-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> ret <16 x float> %shuffle @@ -102,8 +101,7 @@ define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14 define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) { ; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: ; ALL: # BB#0: -; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] -; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; ALL-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> ret <16 x float> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index fde9fb40a58..affd6bdb67b 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -571,14 +571,12 @@ define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00224466: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6] -; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00224466: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0] -; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> ret <8 x double> %shuffle -- 2.49.0