From e91c6128e6a243d405e5eddd46ea5f10d11215b9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 5 Feb 2017 22:25:40 +0000 Subject: [PATCH] [AVX-512] Add masked scalar FMA intrinsics to isNonFoldablePartialRegisterLoad to improve load folding of scalar loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 24 ++++++++++++++++++++++++ test/CodeGen/X86/avx512-intrinsics.ll | 12 ++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8750880ba95..ced83c140de 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -7740,6 +7740,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int: case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int: case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int: + case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk: + case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk: + case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk: + case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk: + case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk: + case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk: + case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz: + case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz: + case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz: + case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz: + case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz: + case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz: return false; default: return true; @@ -7773,6 +7785,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int: case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int: case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int: + case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk: + case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk: + case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk: + case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk: + case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk: + case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk: + case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz: + case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz: + case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz: + case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz: + case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz: + case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz: return false; default: return true; diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 916fd3af51a..fbd49a3258e 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -4843,10 +4843,9 @@ define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) { ; CHECK-LABEL: fmadd_ss_mask_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -4872,10 +4871,9 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { ; CHECK-LABEL: fmadd_ss_maskz_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -4901,10 +4899,9 @@ define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) { ; CHECK-LABEL: fmadd_sd_mask_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} ; CHECK-NEXT: vmovlps %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a @@ -4926,10 +4923,9 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { ; CHECK-LABEL: fmadd_sd_maskz_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vmovlps %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a -- 2.50.1