From: Craig Topper Date: Thu, 31 May 2018 05:02:08 +0000 (+0000) Subject: [X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit equivalents. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ae5140072b20e79fa8d575b5e11813adf581ea52;p=clang [X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit equivalents. Previously we were just passing -1 mask to the masked builtin. This changes it to the more generic way that the 128/256 bit use. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@333626 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 317cf215d6..5c5bc6c5a8 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -4590,10 +4590,10 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS _mm512_loadu_si512 (void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + struct __loadu_si512 { + __m512i __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_si512*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS @@ -4686,10 +4686,7 @@ _mm512_loadu_ps(void const *__p) static __inline __m512 __DEFAULT_FN_ATTRS _mm512_load_ps(void const *__p) { - return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return *(__m512*)__p; } static __inline __m512 __DEFAULT_FN_ATTRS @@ -4712,10 +4709,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P) static __inline __m512d __DEFAULT_FN_ATTRS _mm512_load_pd(void const *__p) { - return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return *(__m512d*)__p; } static __inline __m512d __DEFAULT_FN_ATTRS @@ -4765,8 +4759,10 @@ _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_si512 (void *__P, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, - (__mmask16) -1); + struct __storeu_si512 { + __m512i __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si512*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -4785,7 +4781,10 @@ _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_pd(void *__P, __m512d __A) { - __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); + struct __storeu_pd { + __m512d __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pd*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -4798,7 +4797,10 @@ _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_ps(void *__P, __m512 __A) { - __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); + struct __storeu_ps { + __m512 __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_ps*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index d919949308..e58c29b864 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -159,7 +159,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m512d b) void test_mm512_storeu_si512 (void *__P, __m512i __A) { // CHECK-LABEL: @test_mm512_storeu_si512 - // CHECK: store <16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, align 1{{$}} + // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}} // CHECK-NEXT: ret void _mm512_storeu_si512 ( __P,__A); } @@ -253,7 +253,7 @@ void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { __m512i test_mm512_loadu_si512 (void *__P) { // CHECK-LABEL: @test_mm512_loadu_si512 - // CHECK: load <16 x i32>, <16 x i32>* %{{.*}}, align 1{{$}} + // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}} return _mm512_loadu_si512 ( __P); }