From: Simon Pilgrim Date: Mon, 30 May 2016 17:55:25 +0000 (+0000) Subject: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7ad5fba5dc94e5f1d56d70ac2af484b5af0ec628;p=clang [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_pd). Additionally, according to the intel intrinsics docs and msdn codegen the _mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer. This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Differential Revision: http://reviews.llvm.org/D20617 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271218 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index a78ec25ac3..08ee06b1c6 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -587,20 +587,23 @@ _mm_store_sd(double *__dp, __m128d __a) ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; } +static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d*)__dp = __a; +} + static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { - double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 1ae47e1a96..2a048c880f 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -1593,22 +1593,22 @@ _mm_storeu_ps(float *__p, __m128 __a) } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128*)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { - return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c index 467b4f12f8..de78d707c0 100644 --- a/test/CodeGen/sse2-builtins.c +++ b/test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d B) { _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m128i B) { void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); }