bool combineIncDecVector(SDNode *Node);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
+ bool tryMatchBitSelect(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
return true;
}
+// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
+// into vpternlog.
+bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
+
+ MVT NVT = N->getSimpleValueType(0);
+
+ // Make sure we support VPTERNLOG.
+ if (!NVT.isVector() || !Subtarget->hasAVX512())
+ return false;
+
+ // We need VLX for 128/256-bit.
+ if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Canonicalize AND to LHS.
+ if (N1.getOpcode() == ISD::AND)
+ std::swap(N0, N1);
+
+ if (N0.getOpcode() != ISD::AND ||
+ N1.getOpcode() != X86ISD::ANDNP ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return false;
+
+ // ANDN is not commutable, use it to pick down A and C.
+ SDValue A = N1.getOperand(0);
+ SDValue C = N1.getOperand(1);
+
+ // AND is commutable, if one operand matches A, the other operand is B.
+ // Otherwise this isn't a match.
+ SDValue B;
+ if (N0.getOperand(0) == A)
+ B = N0.getOperand(1);
+ else if (N0.getOperand(1) == A)
+ B = N0.getOperand(0);
+ else
+ return false;
+
+ SDLoc dl(N);
+ SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
+ SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
+ ReplaceNode(N, Ternlog.getNode());
+ SelectCode(Ternlog.getNode());
+ return true;
+}
+
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opcode = Node->getOpcode();
if (tryShrinkShlLogicImm(Node))
return;
+ if (Opcode == ISD::OR && tryMatchBitSelect(Node))
+ return;
+
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
avx512vl_i64_info>, VEX_W;
+// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv16i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv8i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv32i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv16i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv64i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv32i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
// so that the result is only dependent on src0. But we use the same source
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
-let Predicates = [HasVLX] in {
- def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-
- def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-}
-
-let Predicates = [HasAVX512] in {
- def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-}
-
//===----------------------------------------------------------------------===//
// AVX-512 - FixupImm
//===----------------------------------------------------------------------===//
;
; AVX512F-LABEL: bitselect_v8i64_mm:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
-; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0
-; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
+; AVX512F-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = load <8 x i64>, <8 x i64>* %0
%4 = load <8 x i64>, <8 x i64>* %1
; CHECK-LABEL: v4f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq
%tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
ret <4 x float> %tmp
; CHECK-LABEL: v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: vpternlogd $226, %ymm1, %ymm2, %ymm0
; CHECK-NEXT: retq
%tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
ret <8 x float> %tmp
; CHECK-LABEL: v16f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: vpternlogd $226, %zmm1, %zmm2, %zmm0
; CHECK-NEXT: retq
%tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
ret <16 x float> %tmp
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
;
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
;
; AVX512VL-LABEL: var_rotate_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
;
; AVX512VL-LABEL: splatconstant_rotate_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v32i8:
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT: retq
;