From 1f42623bce9db9e4ca7989a5eac231c9e9d0a0ef Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 17 Sep 2017 22:36:41 +0000 Subject: [PATCH] [X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles with SSE1 only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313504 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++++++++++----- test/CodeGen/X86/vector-shuffle-sse1.ll | 4 ++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4df7621bee2..d327c98be69 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10725,6 +10725,15 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } + // Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid + // in SSE1 because otherwise they are widened to v2f64 and never get here. + if (!Subtarget.hasSSE2()) { + if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1})) + return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1); + if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3})) + return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1); + } + // Otherwise, use a straight shuffle of a single input vector. We pass the // input vector to both operands to simulate this with a SHUFPS. return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, @@ -10757,11 +10766,14 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, return BlendPerm; } - // Use low/high mov instructions. - if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) - return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7})) - return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); + // Use low/high mov instructions. These are only valid in SSE1 because + // otherwise they are widened to v2f64 and never get here. + if (!Subtarget.hasSSE2()) { + if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) + return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7})) + return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); + } // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll index 22b5102c06f..cf8e8eb8a12 100644 --- a/test/CodeGen/X86/vector-shuffle-sse1.ll +++ b/test/CodeGen/X86/vector-shuffle-sse1.ll @@ -112,7 +112,7 @@ define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) { ; SSE1-LABEL: shuffle_v4f32_0101: ; SSE1: # BB#0: -; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE1-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle @@ -121,7 +121,7 @@ define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) { define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) { ; SSE1-LABEL: shuffle_v4f32_2323: ; SSE1: # BB#0: -; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE1-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle -- 2.40.0