From 9ca276657af51f753669cb6c4030159356258dc1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 19 Feb 2017 08:03:26 +0000 Subject: [PATCH] [AVX-512] Add broadcast VPTERNLOG instructions to special case commuting switch. The instructions are marked commutable, but without special handling we don't get the immediate correct. While here also remove the masked memory forms that aren't commutable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 50 +++++++++++++++----- test/CodeGen/X86/avx512-vpternlog-commute.ll | 8 ++-- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4efcfada427..15325156b16 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5292,18 +5292,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: - case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: { + case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: { auto &WorkingMI = cloneIfNew(MI); if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2)) return nullptr; @@ -5484,18 +5496,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); default: const X86InstrFMA3Group *FMA3Group = diff --git a/test/CodeGen/X86/avx512-vpternlog-commute.ll b/test/CodeGen/X86/avx512-vpternlog-commute.ll index 1fa808b619f..3ea1a81d9ba 100644 --- a/test/CodeGen/X86/avx512-vpternlog-commute.ll +++ b/test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -559,7 +559,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2: ; CHECK: ## BB#0: -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 ; CHECK-NEXT: retq %x2_scalar = load i32, i32* %ptr_x2 %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0 @@ -571,7 +571,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0: ; CHECK: ## BB#0: -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 ; CHECK-NEXT: retq %x0_scalar = load i32, i32* %ptr_x0 %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0 @@ -851,7 +851,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 @@ -864,7 +864,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 -- 2.50.1