From: Craig Topper Date: Fri, 21 Jul 2017 00:40:42 +0000 (+0000) Subject: [AVX-512] Fix a bug that prevented some non-temporal loads from using the movntdqa... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6bf1d9e47ccbaba32626e2261860fc225e51c17d;p=llvm [AVX-512] Fix a bug that prevented some non-temporal loads from using the movntdqa instruction. The bitconverts here had an input type of 128-bits and an output type of 256 bits. The input type should also have been 256 bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308702 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 705d0f7a5cf..ccfe7ce615d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4328,11 +4328,11 @@ let Predicates = [HasVLX], AddedComplexity = 400 in { (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(v4i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), + def : Pat<(v8i32 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), + def : Pat<(v16i16 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), + def : Pat<(v32i8 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll index 3c916fd38c6..a9e42ad5f7b 100644 --- a/test/CodeGen/X86/nontemporal-loads.ll +++ b/test/CodeGen/X86/nontemporal-loads.ll @@ -211,20 +211,10 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) { ; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: test_v8i32: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_v8i32: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v8i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovaps (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v8i32: +; AVX512: # BB#0: +; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 +; AVX512-NEXT: retq %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 ret <8 x i32> %1 } @@ -876,22 +866,11 @@ define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) { ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: test_arg_v8i32: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovntdqa (%rdi), %ymm1 -; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_arg_v8i32: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_arg_v8i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_arg_v8i32: +; AVX512: # BB#0: +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 %2 = add <8 x i32> %arg, %1 ret <8 x i32> %2