From 52bde850c7f6a19a34019a1a6b73e0106364fe3f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 29 Sep 2019 01:24:29 +0000
Subject: [PATCH] [X86] Move bitselect matching to vpternlog into
 X86ISelDAGToDAG.cpp

This allows us to reduce the use count on the condition node before
the match. This enables load folding for that operand without
relying on the peephole pass. This will be improved on for
broadcast load folding in a subsequent commit.

This still requires a bunch of isel patterns for vXi16/vXi8 types
though.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373156 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelDAGToDAG.cpp      |  53 +++++++++
 lib/Target/X86/X86InstrAVX512.td        | 150 +++++++++++++++++-------
 test/CodeGen/X86/combine-bitselect.ll   |   6 +-
 test/CodeGen/X86/vec-copysign-avx512.ll |   6 +-
 test/CodeGen/X86/vector-fshl-128.ll     |  18 +--
 test/CodeGen/X86/vector-fshl-256.ll     |  18 +--
 test/CodeGen/X86/vector-fshl-512.ll     |  24 ++--
 test/CodeGen/X86/vector-fshl-rot-128.ll |  12 +-
 test/CodeGen/X86/vector-fshl-rot-256.ll |  24 ++--
 test/CodeGen/X86/vector-fshl-rot-512.ll |  12 +-
 test/CodeGen/X86/vector-fshr-128.ll     |  18 +--
 test/CodeGen/X86/vector-fshr-256.ll     |  18 +--
 test/CodeGen/X86/vector-fshr-512.ll     |  24 ++--
 test/CodeGen/X86/vector-fshr-rot-128.ll |  12 +-
 test/CodeGen/X86/vector-fshr-rot-256.ll |  24 ++--
 test/CodeGen/X86/vector-fshr-rot-512.ll |  12 +-
 test/CodeGen/X86/vector-rotate-256.ll   |  24 ++--
 17 files changed, 286 insertions(+), 169 deletions(-)

diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 83c1251265f..cb1c7d3c339 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -510,6 +510,7 @@ namespace {
     bool combineIncDecVector(SDNode *Node);
     bool tryShrinkShlLogicImm(SDNode *N);
     bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
+    bool tryMatchBitSelect(SDNode *N);
 
     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
                                 const SDLoc &dl, MVT VT, SDNode *Node);
@@ -4275,6 +4276,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
   return true;
 }
 
+// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
+// into vpternlog.
+bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
+  assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
+
+  MVT NVT = N->getSimpleValueType(0);
+
+  // Make sure we support VPTERNLOG.
+  if (!NVT.isVector() || !Subtarget->hasAVX512())
+    return false;
+
+  // We need VLX for 128/256-bit.
+  if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+    return false;
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Canonicalize AND to LHS.
+  if (N1.getOpcode() == ISD::AND)
+    std::swap(N0, N1);
+
+  if (N0.getOpcode() != ISD::AND ||
+      N1.getOpcode() != X86ISD::ANDNP ||
+      !N0.hasOneUse() || !N1.hasOneUse())
+    return false;
+
+  // ANDN is not commutable, use it to pick down A and C.
+  SDValue A = N1.getOperand(0);
+  SDValue C = N1.getOperand(1);
+
+  // AND is commutable, if one operand matches A, the other operand is B.
+  // Otherwise this isn't a match.
+  SDValue B;
+  if (N0.getOperand(0) == A)
+    B = N0.getOperand(1);
+  else if (N0.getOperand(1) == A)
+    B = N0.getOperand(0);
+  else
+    return false;
+
+  SDLoc dl(N);
+  SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
+  SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
+  ReplaceNode(N, Ternlog.getNode());
+  SelectCode(Ternlog.getNode());
+  return true;
+}
+
 void X86DAGToDAGISel::Select(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   unsigned Opcode = Node->getOpcode();
@@ -4433,6 +4483,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     if (tryShrinkShlLogicImm(Node))
       return;
 
+    if (Opcode == ISD::OR && tryMatchBitSelect(Node))
+      return;
+
     LLVM_FALLTHROUGH;
   case ISD::ADD:
   case ISD::SUB: {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 60bcf3e2dfd..18c95a631c9 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -11436,6 +11436,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
                                         avx512vl_i64_info>, VEX_W;
 
+// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+                               timm:$src4)>;
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (loadv16i8 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               timm:$src4)>;
+  def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
+                                 VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+                               timm:$src4)>;
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (loadv8i16 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               timm:$src4)>;
+  def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
+                                 VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+                               timm:$src4)>;
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                 (loadv32i8 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               timm:$src4)>;
+  def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
+                                 VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
+                                 VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+                                  (i8 timm:$src4))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+                               timm:$src4)>;
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                  (loadv16i16 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               timm:$src4)>;
+  def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
+                                  VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
+                                  VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
+let Predicates = [HasAVX512] in {
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+                            timm:$src4)>;
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+                                 (loadv64i8 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            timm:$src4)>;
+  def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
+                                  VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
+                                 VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+                                  (i8 timm:$src4))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+                            timm:$src4)>;
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (loadv32i16 addr:$src3), (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            timm:$src4)>;
+  def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
+                                  VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
+                                 VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+                            (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
 // Patterns to implement vnot using vpternlog instead of creating all ones
 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
 // so that the result is only dependent on src0. But we use the same source
@@ -11533,49 +11640,6 @@ let Predicates = [HasVLX] in {
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
 }
 
-let Predicates = [HasVLX] in {
-  def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
-                       (X86andnp VR128X:$src1, VR128X:$src3))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-  def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
-                       (X86andnp VR128X:$src1, VR128X:$src3))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-  def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
-                       (X86andnp VR128X:$src1, VR128X:$src3))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-  def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
-                       (X86andnp VR128X:$src1, VR128X:$src3))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-
-  def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
-                       (X86andnp VR256X:$src1, VR256X:$src3))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-  def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
-                        (X86andnp VR256X:$src1, VR256X:$src3))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-  def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
-                       (X86andnp VR256X:$src1, VR256X:$src3))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-  def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
-                       (X86andnp VR256X:$src1, VR256X:$src3))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-}
-
-let Predicates = [HasAVX512] in {
-  def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
-                       (X86andnp VR512:$src1, VR512:$src3))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-  def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
-                        (X86andnp VR512:$src1, VR512:$src3))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-  def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
-                        (X86andnp VR512:$src1, VR512:$src3))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-  def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
-                       (X86andnp VR512:$src1, VR512:$src3))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-}
-
 //===----------------------------------------------------------------------===//
 // AVX-512 - FixupImm
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll
index 743cde84317..ccb969b747f 100644
--- a/test/CodeGen/X86/combine-bitselect.ll
+++ b/test/CodeGen/X86/combine-bitselect.ll
@@ -548,9 +548,9 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n
 ;
 ; AVX512F-LABEL: bitselect_v8i64_mm:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovdqa64 (%rdi), %zmm1
-; AVX512F-NEXT:    vmovdqa64 (%rsi), %zmm0
-; AVX512F-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT:    vmovdqa64 (%rsi), %zmm1
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
+; AVX512F-NEXT:    vpternlogq $202, (%rdi), %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
   %3 = load <8 x i64>, <8 x i64>* %0
   %4 = load <8 x i64>, <8 x i64>* %1
diff --git a/test/CodeGen/X86/vec-copysign-avx512.ll b/test/CodeGen/X86/vec-copysign-avx512.ll
index 13e2e12c928..5ec547f1db8 100644
--- a/test/CodeGen/X86/vec-copysign-avx512.ll
+++ b/test/CodeGen/X86/vec-copysign-avx512.ll
@@ -6,7 +6,7 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: v4f32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT:    vpternlogq $226, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT:    vpternlogd $226, %xmm1, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
   ret <4 x float> %tmp
@@ -16,7 +16,7 @@ define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
 ; CHECK-LABEL: v8f32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT:    vpternlogq $226, %ymm1, %ymm2, %ymm0
+; CHECK-NEXT:    vpternlogd $226, %ymm1, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
   %tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
   ret <8 x float> %tmp
@@ -26,7 +26,7 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
 ; CHECK-LABEL: v16f32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT:    vpternlogq $226, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $226, %zmm1, %zmm2, %zmm0
 ; CHECK-NEXT:    retq
   %tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
   ret <16 x float> %tmp
diff --git a/test/CodeGen/X86/vector-fshl-128.ll b/test/CodeGen/X86/vector-fshl-128.ll
index 5530b9920d4..12a5f2bc2cc 100644
--- a/test/CodeGen/X86/vector-fshl-128.ll
+++ b/test/CodeGen/X86/vector-fshl-128.ll
@@ -2991,9 +2991,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -3016,16 +3016,16 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
 ; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/test/CodeGen/X86/vector-fshl-256.ll b/test/CodeGen/X86/vector-fshl-256.ll
index ed5ebcde68e..cf8a80cf9db 100644
--- a/test/CodeGen/X86/vector-fshl-256.ll
+++ b/test/CodeGen/X86/vector-fshl-256.ll
@@ -2514,9 +2514,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -2539,16 +2539,16 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/test/CodeGen/X86/vector-fshl-512.ll b/test/CodeGen/X86/vector-fshl-512.ll
index 0f5558d7d5f..b6c5d9f744e 100644
--- a/test/CodeGen/X86/vector-fshl-512.ll
+++ b/test/CodeGen/X86/vector-fshl-512.ll
@@ -1559,30 +1559,30 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VBMI2:       # %bb.0:
-; AVX512VBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VBMI2-NEXT:    retq
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VLVBMI2-NEXT:    retq
   %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
   ret <64 x i8> %res
diff --git a/test/CodeGen/X86/vector-fshl-rot-128.ll b/test/CodeGen/X86/vector-fshl-rot-128.ll
index ff56dddd383..ce521ad8896 100644
--- a/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -1846,9 +1846,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -1862,9 +1862,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/test/CodeGen/X86/vector-fshl-rot-256.ll b/test/CodeGen/X86/vector-fshl-rot-256.ll
index be141995329..ca624b0a82e 100644
--- a/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -436,14 +436,14 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
 ;
 ; AVX512VL-LABEL: var_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
@@ -1504,9 +1504,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -1520,9 +1520,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/test/CodeGen/X86/vector-fshl-rot-512.ll b/test/CodeGen/X86/vector-fshl-rot-512.ll
index 94c0be32bc8..8cb0f36a176 100644
--- a/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -829,16 +829,16 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
 ; AVX512VLBW-NEXT:    retq
   %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
   ret <64 x i8> %res
diff --git a/test/CodeGen/X86/vector-fshr-128.ll b/test/CodeGen/X86/vector-fshr-128.ll
index 253826976db..00f5d73a4de 100644
--- a/test/CodeGen/X86/vector-fshr-128.ll
+++ b/test/CodeGen/X86/vector-fshr-128.ll
@@ -3012,9 +3012,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -3037,16 +3037,16 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
 ; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/test/CodeGen/X86/vector-fshr-256.ll b/test/CodeGen/X86/vector-fshr-256.ll
index 918270dc668..8898373bfe8 100644
--- a/test/CodeGen/X86/vector-fshr-256.ll
+++ b/test/CodeGen/X86/vector-fshr-256.ll
@@ -2515,9 +2515,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -2540,16 +2540,16 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/test/CodeGen/X86/vector-fshr-512.ll b/test/CodeGen/X86/vector-fshr-512.ll
index 748aa84974d..ca559a6911a 100644
--- a/test/CodeGen/X86/vector-fshr-512.ll
+++ b/test/CodeGen/X86/vector-fshr-512.ll
@@ -1543,30 +1543,30 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VBMI2:       # %bb.0:
-; AVX512VBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VBMI2-NEXT:    retq
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLVBMI2:       # %bb.0:
-; AVX512VLVBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT:    vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT:    vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
 ; AVX512VLVBMI2-NEXT:    retq
   %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
   ret <64 x i8> %res
diff --git a/test/CodeGen/X86/vector-fshr-rot-128.ll b/test/CodeGen/X86/vector-fshr-rot-128.ll
index f3918daae5e..d88a2a214ca 100644
--- a/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -1928,9 +1928,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -1944,9 +1944,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/test/CodeGen/X86/vector-fshr-rot-256.ll b/test/CodeGen/X86/vector-fshr-rot-256.ll
index 4f940f464b3..bf7c057965b 100644
--- a/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -475,16 +475,16 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
 ;
 ; AVX512VL-LABEL: var_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VL-NEXT:    vpsubb %ymm1, %ymm2, %ymm1
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
@@ -1582,9 +1582,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -1598,9 +1598,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VLBW-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/test/CodeGen/X86/vector-fshr-rot-512.ll b/test/CodeGen/X86/vector-fshr-rot-512.ll
index 33b681861aa..3838dfd4dd1 100644
--- a/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -849,16 +849,16 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
 ;
 ; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT:    vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
 ; AVX512VLBW-NEXT:    retq
   %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
   ret <64 x i8> %res
diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll
index 1b7555cebdf..df76a7738f8 100644
--- a/test/CodeGen/X86/vector-rotate-256.ll
+++ b/test/CodeGen/X86/vector-rotate-256.ll
@@ -432,14 +432,14 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ;
 ; AVX512VL-LABEL: var_rotate_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
@@ -1505,9 +1505,9 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_rotate_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_rotate_v32i8:
@@ -1787,9 +1787,9 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
 ;
 ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-- 
2.50.1