const X86Subtarget &Subtarget) {
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
- EVT VT = N->getValueType(0);
+ MVT VT = N->getSimpleValueType(0);
if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
return SDValue();
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
return SDValue();
- // On XOP we'll lower to PCMOV so accept one use, otherwise only
- // do this if either mask has multiple uses already.
- if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
- !N1.getOperand(1).hasOneUse()))
+ // On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
+ // VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
+ bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) ||
+ Subtarget.hasVLX();
+ if (!(Subtarget.hasXOP() || UseVPTERNLOG ||
+ !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse()))
return SDValue();
// Attempt to extract constant byte masks.
;
; AVX512F-LABEL: bitselect_v8i64_rr:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = and <8 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890, i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890>
%4 = and <8 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891>
; AVX512F-LABEL: bitselect_v8i64_rm:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = load <8 x i64>, <8 x i64>* %1
%4 = and <8 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3>
; AVX512F-LABEL: bitselect_v8i64_mr:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = load <8 x i64>, <8 x i64>* %0
%4 = and <8 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296>
;
; AVX512F-LABEL: bitselect_v8i64_mm:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
-; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
+; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0
+; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = load <8 x i64>, <8 x i64>* %0
%4 = load <8 x i64>, <8 x i64>* %1
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.10.0 -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VLDQ
define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
-; AVX512VL-LABEL: v4f32:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v4f32:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v4f32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: retq
%tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
ret <4 x float> %tmp
}
define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
-; AVX512VL-LABEL: v8f32:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v8f32:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v8f32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
%tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
ret <8 x float> %tmp
}
define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
-; AVX512VL-LABEL: v16f32:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v16f32:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm1, %zmm1
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512VLDQ-NEXT: vorps %zmm1, %zmm0, %zmm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v16f32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
%tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
ret <16 x float> %tmp
}
define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
; CHECK-LABEL: v2f64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
-; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
; CHECK-NEXT: retq
%tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
ret <2 x double> %tmp
}
define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
-; AVX512VL-LABEL: v4f64:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v4f64:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vorpd %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v4f64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
%tmp = tail call <4 x double> @llvm.copysign.v4f64( <4 x double> %a, <4 x double> %b )
ret <4 x double> %tmp
}
define <8 x double> @v8f64(<8 x double> %a, <8 x double> %b) nounwind {
-; AVX512VL-LABEL: v8f64:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v8f64:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512VLDQ-NEXT: vorpd %zmm1, %zmm0, %zmm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v8f64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
%tmp = tail call <8 x double> @llvm.copysign.v8f64( <8 x double> %a, <8 x double> %b )
ret <8 x double> %tmp
}
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
; XOP: # %bb.0:
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
; XOP: # %bb.0:
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
; XOP: # %bb.0:
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
; XOP: # %bb.0:
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
-; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vpsubb %ymm1, %ymm3, %ymm1
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_rotate_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllw $4, %xmm0, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_rotate_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_rotate_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_rotate_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_rotate_v16i8:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_v16i8:
; XOP: # %bb.0:
; AVX512VL-LABEL: var_rotate_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
; AVX512VL-LABEL: splatconstant_rotate_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v32i8:
; AVX512VLBW-LABEL: splatconstant_rotate_v32i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_v32i8:
; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>