From: Craig Topper Date: Sun, 8 May 2016 23:43:17 +0000 (+0000) Subject: [AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=84a0ca551ea48b662283f12bb1c79c8d9a8d8b5c;p=llvm [AVX512] Add non-temporal store patterns for v16i32/v32i16/v64i8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268889 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2115ec2ce81..047b3c30a4e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3214,6 +3214,15 @@ defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD; defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W; defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS; +let Predicates = [HasAVX512], AddedComplexity = 400 in { + def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), + (VMOVNTDQZmr addr:$dst, VR512:$src)>; + def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), + (VMOVNTDQZmr addr:$dst, VR512:$src)>; + def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), + (VMOVNTDQZmr addr:$dst, VR512:$src)>; +} + let Predicates = [HasVLX], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll index bf57d021aca..adfaef25b7d 100644 --- a/test/CodeGen/X86/avx512-nontemporal.ll +++ b/test/CodeGen/X86/avx512-nontemporal.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s +; RUN: llc < %s -march=x86-64 -mattr=+avx512f,+avx512bw | FileCheck %s -define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) { +define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH) { ; CHECK: vmovntps %z %cast = bitcast i8* %B to <16 x float>* %A2 = fadd <16 x float> %A, %AA @@ -13,6 +13,18 @@ define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x %cast2 = bitcast i8* %B to <8 x double>* %C2 = fadd <8 x double> %C, %CC store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0 +; CHECK: vmovntdq %z + %cast3 = bitcast i8* %B to <16 x i32>* + %F2 = add <16 x i32> %F, %FF + store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0 +; CHECK: vmovntdq %z + %cast4 = bitcast i8* %B to <32 x i16>* + %G2 = add <32 x i16> %G, %GG + store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0 +; CHECK: vmovntdq %z + %cast5 = bitcast i8* %B to <64 x i8>* + %H2 = add <64 x i8> %H, %HH + store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0 ret void }