From 4c1b9a2ab6c7fc7cc325b8096fad3dd0f38d4b51 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Nov 2016 05:31:57 +0000 Subject: [PATCH] [AVX-512] Use alignedstore256 in patterns that look for stores of the lower 256-bits of a 512-bit vector to use a 256-bit aligned store. Previously we were only checking for 16 byte alignment instead of 32 byte alignment. Fixes PR30947. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 20 ++++++++++---------- test/CodeGen/X86/avx512-extract-subvector.ll | 10 +++++----- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index a5f73108003..7fafa207c4c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3088,23 +3088,23 @@ let Predicates = [HasVLX] in { // Special patterns for storing subvector extracts of lower 256-bits of 512. // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr - def : Pat<(alignedstore (v4f64 (extract_subvector - (v8f64 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v4f64 (extract_subvector + (v8f64 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; def : Pat<(alignedstore (v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v4i64 (extract_subvector - (v8i64 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v4i64 (extract_subvector + (v8i64 VR512:$src), (iPTR 0))), addr:$dst), (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v8i32 (extract_subvector - (v16i32 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v8i32 (extract_subvector + (v16i32 VR512:$src), (iPTR 0))), addr:$dst), (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v16i16 (extract_subvector - (v32i16 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v16i16 (extract_subvector + (v32i16 VR512:$src), (iPTR 0))), addr:$dst), (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v32i8 (extract_subvector - (v64i8 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v32i8 (extract_subvector + (v64i8 VR512:$src), (iPTR 0))), addr:$dst), (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; def : Pat<(store (v4f64 (extract_subvector diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index 61f8299983f..8ea9b532cd7 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -288,7 +288,7 @@ entry: define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -360,7 +360,7 @@ entry: define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -396,7 +396,7 @@ entry: define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -432,7 +432,7 @@ entry: define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> @@ -468,7 +468,7 @@ entry: define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> -- 2.40.0