From ab6082193d0ec451b0005f5c47cc776c029f1b8f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 29 Sep 2019 01:24:22 +0000 Subject: [PATCH] [X86] Enable canonicalizeBitSelect for AVX512 since we can use VPTERNLOG now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373155 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++-- test/CodeGen/X86/combine-bitselect.ll | 20 ++---- test/CodeGen/X86/vec-copysign-avx512.ll | 94 +++++++------------------ test/CodeGen/X86/vector-fshl-128.ll | 55 ++++++++++++--- test/CodeGen/X86/vector-fshl-256.ll | 55 ++++++++++++--- test/CodeGen/X86/vector-fshl-512.ll | 16 ++--- test/CodeGen/X86/vector-fshl-rot-128.ll | 39 +++++++--- test/CodeGen/X86/vector-fshl-rot-256.ll | 51 +++++++++----- test/CodeGen/X86/vector-fshl-rot-512.ll | 8 +-- test/CodeGen/X86/vector-fshr-128.ll | 55 ++++++++++++--- test/CodeGen/X86/vector-fshr-256.ll | 55 ++++++++++++--- test/CodeGen/X86/vector-fshr-512.ll | 16 ++--- test/CodeGen/X86/vector-fshr-rot-128.ll | 39 +++++++--- test/CodeGen/X86/vector-fshr-rot-256.ll | 55 ++++++++++----- test/CodeGen/X86/vector-fshr-rot-512.ll | 8 +-- test/CodeGen/X86/vector-rotate-128.ll | 39 +++++++--- test/CodeGen/X86/vector-rotate-256.ll | 24 ++----- test/CodeGen/X86/vector-rotate-512.ll | 8 +-- 18 files changed, 413 insertions(+), 236 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 521fc3cd37b..daba879c7fc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -39582,7 +39582,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); - EVT VT = N->getValueType(0); + MVT VT = N->getSimpleValueType(0); if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) return SDValue(); @@ -39591,10 +39591,12 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) return SDValue(); - // On XOP we'll lower to PCMOV so accept one use, otherwise only - // do this if either mask has multiple uses already. - if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() || - !N1.getOperand(1).hasOneUse())) + // On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use + // VPTERNLOG. Otherwise only do this if either mask has multiple uses already. + bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || + Subtarget.hasVLX(); + if (!(Subtarget.hasXOP() || UseVPTERNLOG || + !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) return SDValue(); // Attempt to extract constant byte masks. diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll index 8cb6a4dca09..743cde84317 100644 --- a/test/CodeGen/X86/combine-bitselect.ll +++ b/test/CodeGen/X86/combine-bitselect.ll @@ -329,9 +329,7 @@ define <8 x i64> @bitselect_v8i64_rr(<8 x i64>, <8 x i64>) { ; ; AVX512F-LABEL: bitselect_v8i64_rr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq %3 = and <8 x i64> %0, %4 = and <8 x i64> %1, @@ -405,9 +403,7 @@ define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, <8 x i64>* nocapture readonly) { ; AVX512F-LABEL: bitselect_v8i64_rm: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %1 %4 = and <8 x i64> %0, @@ -482,9 +478,7 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) { ; AVX512F-LABEL: bitselect_v8i64_mr: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %0 %4 = and <8 x i64> %3, @@ -554,11 +548,9 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n ; ; AVX512F-LABEL: bitselect_v8i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 +; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0 +; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %0 %4 = load <8 x i64>, <8 x i64>* %1 diff --git a/test/CodeGen/X86/vec-copysign-avx512.ll b/test/CodeGen/X86/vec-copysign-avx512.ll index b08b15ce004..13e2e12c928 100644 --- a/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/test/CodeGen/X86/vec-copysign-avx512.ll @@ -3,55 +3,31 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.10.0 -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VLDQ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { -; AVX512VL-LABEL: v4f32: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v4f32: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm1, %xmm1 -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v4f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq %tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b ) ret <4 x float> %tmp } define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind { -; AVX512VL-LABEL: v8f32: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v8f32: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vorps %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v8f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] +; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq %tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b ) ret <8 x float> %tmp } define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { -; AVX512VL-LABEL: v16f32: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v16f32: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm1, %zmm1 -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512VLDQ-NEXT: vorps %zmm1, %zmm0, %zmm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v16f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] +; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq %tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b ) ret <16 x float> %tmp } @@ -59,46 +35,28 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { ; CHECK-LABEL: v2f64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 ; CHECK-NEXT: retq %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b ) ret <2 x double> %tmp } define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind { -; AVX512VL-LABEL: v4f64: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v4f64: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vorpd %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v4f64: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq %tmp = tail call <4 x double> @llvm.copysign.v4f64( <4 x double> %a, <4 x double> %b ) ret <4 x double> %tmp } define <8 x double> @v8f64(<8 x double> %a, <8 x double> %b) nounwind { -; AVX512VL-LABEL: v8f64: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v8f64: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512VLDQ-NEXT: vorpd %zmm1, %zmm0, %zmm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v8f64: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] +; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq %tmp = tail call <8 x double> @llvm.copysign.v8f64( <8 x double> %a, <8 x double> %b ) ret <8 x double> %tmp } diff --git a/test/CodeGen/X86/vector-fshl-128.ll b/test/CodeGen/X86/vector-fshl-128.ll index b8bbc5b2bd5..5530b9920d4 100644 --- a/test/CodeGen/X86/vector-fshl-128.ll +++ b/test/CodeGen/X86/vector-fshl-128.ll @@ -2980,14 +2980,53 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshl-256.ll b/test/CodeGen/X86/vector-fshl-256.ll index f969aef6c98..ed5ebcde68e 100644 --- a/test/CodeGen/X86/vector-fshl-256.ll +++ b/test/CodeGen/X86/vector-fshl-256.ll @@ -2503,14 +2503,53 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v32i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshl-512.ll b/test/CodeGen/X86/vector-fshl-512.ll index ebaf22a6019..0f5558d7d5f 100644 --- a/test/CodeGen/X86/vector-fshl-512.ll +++ b/test/CodeGen/X86/vector-fshl-512.ll @@ -1560,37 +1560,29 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/test/CodeGen/X86/vector-fshl-rot-128.ll b/test/CodeGen/X86/vector-fshl-rot-128.ll index 706cdf016f0..ff56dddd383 100644 --- a/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1835,14 +1835,37 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshl-rot-256.ll b/test/CodeGen/X86/vector-fshl-rot-256.ll index eb4304e3845..be141995329 100644 --- a/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -437,19 +437,15 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; AVX512VL-LABEL: var_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 @@ -1497,14 +1493,37 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v32i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshl-rot-512.ll b/test/CodeGen/X86/vector-fshl-rot-512.ll index 540ad19a6ec..94c0be32bc8 100644 --- a/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -830,19 +830,15 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/test/CodeGen/X86/vector-fshr-128.ll b/test/CodeGen/X86/vector-fshr-128.ll index d1ac70aa7cc..253826976db 100644 --- a/test/CodeGen/X86/vector-fshr-128.ll +++ b/test/CodeGen/X86/vector-fshr-128.ll @@ -3001,14 +3001,53 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 +; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshr-256.ll b/test/CodeGen/X86/vector-fshr-256.ll index 9f74d39077a..918270dc668 100644 --- a/test/CodeGen/X86/vector-fshr-256.ll +++ b/test/CodeGen/X86/vector-fshr-256.ll @@ -2504,14 +2504,53 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v32i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshr-512.ll b/test/CodeGen/X86/vector-fshr-512.ll index 7c20ec8f4f7..748aa84974d 100644 --- a/test/CodeGen/X86/vector-fshr-512.ll +++ b/test/CodeGen/X86/vector-fshr-512.ll @@ -1544,37 +1544,29 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/test/CodeGen/X86/vector-fshr-rot-128.ll b/test/CodeGen/X86/vector-fshr-rot-128.ll index 7e3d7fc6cf8..f3918daae5e 100644 --- a/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1917,14 +1917,37 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshr-rot-256.ll b/test/CodeGen/X86/vector-fshr-rot-256.ll index f0c9d1e37cf..4f940f464b3 100644 --- a/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -476,21 +476,17 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; AVX512VL-LABEL: var_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 -; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpsubb %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 @@ -1575,14 +1571,37 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v32i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshr-rot-512.ll b/test/CodeGen/X86/vector-fshr-rot-512.ll index 4e39a5a7dda..33b681861aa 100644 --- a/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -850,19 +850,15 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll index e9802e2586b..3acdca7cda5 100644 --- a/test/CodeGen/X86/vector-rotate-128.ll +++ b/test/CodeGen/X86/vector-rotate-128.ll @@ -1801,14 +1801,37 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_rotate_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_rotate_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_rotate_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_rotate_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_rotate_v16i8: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll index 9478db6c4ae..1b7555cebdf 100644 --- a/test/CodeGen/X86/vector-rotate-256.ll +++ b/test/CodeGen/X86/vector-rotate-256.ll @@ -433,19 +433,15 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512VL-LABEL: var_rotate_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 @@ -1510,10 +1506,8 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_rotate_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v32i8: @@ -1528,10 +1522,8 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX512VLBW-LABEL: splatconstant_rotate_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_v32i8: @@ -1796,10 +1788,8 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-rotate-512.ll b/test/CodeGen/X86/vector-rotate-512.ll index 2e03525dd7a..d92d73a220d 100644 --- a/test/CodeGen/X86/vector-rotate-512.ll +++ b/test/CodeGen/X86/vector-rotate-512.ll @@ -819,19 +819,15 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind { ; AVX512BW-LABEL: splatconstant_rotate_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %shl = shl <64 x i8> %a, %lshr = lshr <64 x i8> %a, -- 2.40.0