From: Craig Topper Date: Sun, 19 Feb 2017 08:03:26 +0000 (+0000) Subject: [AVX-512] Add broadcast VPTERNLOG instructions to special case commuting switch. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ca276657af51f753669cb6c4030159356258dc1;p=llvm [AVX-512] Add broadcast VPTERNLOG instructions to special case commuting switch. The instructions are marked commutable, but without special handling we don't get the immediate correct. While here also remove the masked memory forms that aren't commutable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295602 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4efcfada427..15325156b16 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5292,18 +5292,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: - case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: { + case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: { auto &WorkingMI = cloneIfNew(MI); if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2)) return nullptr; @@ -5484,18 +5496,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); default: const X86InstrFMA3Group *FMA3Group = diff --git a/test/CodeGen/X86/avx512-vpternlog-commute.ll b/test/CodeGen/X86/avx512-vpternlog-commute.ll index 1fa808b619f..3ea1a81d9ba 100644 --- a/test/CodeGen/X86/avx512-vpternlog-commute.ll +++ b/test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -559,7 +559,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2: ; CHECK: ## BB#0: -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 ; CHECK-NEXT: retq %x2_scalar = load i32, i32* %ptr_x2 %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0 @@ -571,7 +571,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0: ; CHECK: ## BB#0: -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 ; CHECK-NEXT: retq %x0_scalar = load i32, i32* %ptr_x0 %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0 @@ -851,7 +851,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 @@ -864,7 +864,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0