From: Craig Topper <craig.topper@gmail.com>
Date: Sun, 8 May 2016 23:43:17 +0000 (+0000)
Subject: [AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=84a0ca551ea48b662283f12bb1c79c8d9a8d8b5c;p=llvm

[AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268889 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 2115ec2ce81..047b3c30a4e 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3214,6 +3214,15 @@ defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
 
+let Predicates = [HasAVX512], AddedComplexity = 400 in {
+  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+}
+
 let Predicates = [HasVLX], AddedComplexity = 400 in {
   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll
index bf57d021aca..adfaef25b7d 100644
--- a/test/CodeGen/X86/avx512-nontemporal.ll
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f,+avx512bw | FileCheck %s
 
-define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH) {
 ; CHECK: vmovntps %z
   %cast = bitcast i8* %B to <16 x float>*
   %A2 = fadd <16 x float> %A, %AA
@@ -13,6 +13,18 @@ define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x
   %cast2 = bitcast i8* %B to <8 x double>*
   %C2 = fadd <8 x double> %C, %CC
   store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast3 = bitcast i8* %B to <16 x i32>*
+  %F2 = add <16 x i32> %F, %FF
+  store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast4 = bitcast i8* %B to <32 x i16>*
+  %G2 = add <32 x i16> %G, %GG
+  store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast5 = bitcast i8* %B to <64 x i8>*
+  %H2 = add <64 x i8> %H, %HH
+  store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0
   ret void
 }