(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
+ (X86andnp VR128X:$src1, VR128X:$src3))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+ def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
+ (X86andnp VR128X:$src1, VR128X:$src3))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+ def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
+ (X86andnp VR128X:$src1, VR128X:$src3))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+ def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
+ (X86andnp VR128X:$src1, VR128X:$src3))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+
+ def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
+ (X86andnp VR256X:$src1, VR256X:$src3))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+ def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
+ (X86andnp VR256X:$src1, VR256X:$src3))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+ def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
+ (X86andnp VR256X:$src1, VR256X:$src3))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+ def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
+ (X86andnp VR256X:$src1, VR256X:$src3))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
+ (X86andnp VR512:$src1, VR512:$src3))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+ def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
+ (X86andnp VR512:$src1, VR512:$src3))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+ def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
+ (X86andnp VR512:$src1, VR512:$src3))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+ def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
+ (X86andnp VR512:$src1, VR512:$src3))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - FixupImm
//===----------------------------------------------------------------------===//
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
-; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm4, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandn %ymm1, %ymm4, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm4, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm6, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm7, %ymm4
-; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm8
-; AVX512VL-NEXT: vpand %ymm7, %ymm8, %ymm8
-; AVX512VL-NEXT: vpor %ymm4, %ymm8, %ymm4
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm5
+; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm7
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT: vpternlogq $226, %ymm5, %ymm8, %ymm7
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm9
-; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
+; AVX512VL-NEXT: vpor %ymm5, %ymm9, %ymm5
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm5, %ymm3
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vpand %ymm6, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5
+; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm7, %ymm3
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm8, %ymm4
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand %ymm8, %ymm3, %ymm3
+; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
-; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm4, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandn %ymm1, %ymm4, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm4, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm6, %ymm5
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpsubb %ymm2, %ymm4, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm8, %ymm4
-; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm9
-; AVX512VL-NEXT: vpand %ymm8, %ymm9, %ymm9
-; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm5
+; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm8
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT: vpternlogq $226, %ymm5, %ymm9, %ymm8
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm8, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm10
-; AVX512VL-NEXT: vpor %ymm4, %ymm10, %ymm4
+; AVX512VL-NEXT: vpor %ymm5, %ymm10, %ymm5
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm5, %ymm3
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vpsubb %ymm1, %ymm6, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5
+; AVX512VL-NEXT: vpsubb %ymm1, %ymm4, %ymm1
; AVX512VL-NEXT: vpand %ymm7, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm8, %ymm3
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm9, %ymm4
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand %ymm9, %ymm3, %ymm3
+; AVX512VL-NEXT: vpand %ymm8, %ymm3, %ymm3
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
+; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm6, %ymm5
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT: vpandn %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm7
-; AVX512VL-NEXT: vpand %ymm6, %ymm7, %ymm7
-; AVX512VL-NEXT: vpor %ymm4, %ymm7, %ymm4
+; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm7, %ymm5
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8
; AVX512VL-NEXT: vpor %ymm4, %ymm8, %ymm4
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm5, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm4
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpandn %ymm3, %ymm6, %ymm3
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm7, %ymm4
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3
+; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm3
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpandn %ymm4, %ymm3, %ymm4
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm3, %ymm0
; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq