[AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8.

author Craig Topper <craig.topper@gmail.com>

Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)

committer Craig Topper <craig.topper@gmail.com>

Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)
author Craig Topper <craig.topper@gmail.com>
Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)
committer Craig Topper <craig.topper@gmail.com>
Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 2115ec2ce812c1534a5f2e780176b1cd5cbddd30..047b3c30a4e919022ec769b40f3a1e43965e3c5d 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3214,6 +3214,15 @@ defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
  defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
  defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
  
+let Predicates = [HasAVX512], AddedComplexity = 400 in {
+  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
+            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
+}
+
  let Predicates = [HasVLX], AddedComplexity = 400 in {
    def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
              (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll

index bf57d021acabb14ab6b3135c64e1412d4c4eaf1e..adfaef25b7d39c21ce9c78f0d5ef9bf7c1058763 100644 (file)
--- a/test/CodeGen/X86/avx512-nontemporal.ll
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f,+avx512bw | FileCheck %s
  
-define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH) {
  ; CHECK: vmovntps %z
    %cast = bitcast i8* %B to <16 x float>*
    %A2 = fadd <16 x float> %A, %AA
@@ -13,6 +13,18 @@ define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x
    %cast2 = bitcast i8* %B to <8 x double>*
    %C2 = fadd <8 x double> %C, %CC
    store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast3 = bitcast i8* %B to <16 x i32>*
+  %F2 = add <16 x i32> %F, %FF
+  store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast4 = bitcast i8* %B to <32 x i16>*
+  %G2 = add <32 x i16> %G, %GG
+  store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast5 = bitcast i8* %B to <64 x i8>*
+  %H2 = add <64 x i8> %H, %HH
+  store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0
    ret void
  }
author	Craig Topper <craig.topper@gmail.com>
	Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Sun, 8 May 2016 23:43:17 +0000 (23:43 +0000)
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
test/CodeGen/X86/avx512-nontemporal.ll		patch \| blob \| history