[X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles...

author Craig Topper <craig.topper@intel.com>

Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 4df7621bee2bb0ac0afed266271364fef49a6fed..d327c98be69302ad48bad26df7ebd58b24bc67ab 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10725,6 +10725,15 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                           getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
      }
  
+    // Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid
+    // in SSE1 because otherwise they are widened to v2f64 and never get here.
+    if (!Subtarget.hasSSE2()) {
+      if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}))
+        return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1);
+      if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3}))
+        return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1);
+    }
+
      // Otherwise, use a straight shuffle of a single input vector. We pass the
      // input vector to both operands to simulate this with a SHUFPS.
      return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
@@ -10757,11 +10766,14 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
          return BlendPerm;
    }
  
-  // Use low/high mov instructions.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
-    return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
-    return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+  // Use low/high mov instructions. These are only valid in SSE1 because
+  // otherwise they are widened to v2f64 and never get here.
+  if (!Subtarget.hasSSE2()) {
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
+      return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
+    if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
+      return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+  }
  
    // Use dedicated unpack instructions for masks that match their pattern.
    if (SDValue V =
diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll

index 22b5102c06f76d77087602c6d5a93fd43ec6accd..cf8e8eb8a12178c009633ba5666ab09faa52b0da 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -112,7 +112,7 @@ define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
  define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) {
  ; SSE1-LABEL: shuffle_v4f32_0101:
  ; SSE1:       # BB#0:
-; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
  ; SSE1-NEXT:    retq
    %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    ret <4 x float> %shuffle
@@ -121,7 +121,7 @@ define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) {
  define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) {
  ; SSE1-LABEL: shuffle_v4f32_2323:
  ; SSE1:       # BB#0:
-; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE1-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
  ; SSE1-NEXT:    retq
    %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
    ret <4 x float> %shuffle
author	Craig Topper <craig.topper@intel.com>
	Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 17 Sep 2017 22:36:41 +0000 (22:36 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-sse1.ll		patch \| blob \| history