From 3dbd05388471318fa4b574cde57e28ea2cca92e3 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Fri, 16 Jan 2015 18:51:50 +0000 Subject: [PATCH] [AVX512] Add intrinsics for masked aligned FP loads and stores Part of git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@226298 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 4 +++ lib/Headers/avx512fintrin.h | 53 +++++++++++++++++++++++++++-- test/CodeGen/avx512f-builtins.c | 42 +++++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index eb6803b141..545638ea7e 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -897,11 +897,15 @@ BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "") BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "") BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "") BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "") +BUILTIN(__builtin_ia32_loadaps512_mask, "V16fvC*V16fUs", "") BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "") +BUILTIN(__builtin_ia32_loadapd512_mask, "V8dvC*V8dUc", "") BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "") BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "") BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "") +BUILTIN(__builtin_ia32_storeapd512_mask, "vv*V8dUc", "") BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "") +BUILTIN(__builtin_ia32_storeaps512_mask, "vv*V16fUs", "") BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "") BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "") BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 9c80710110..4b19590ebb 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -928,6 +928,24 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_load_ps(__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_load_pd(__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + static __inline __m512d __attribute__((__always_inline__, __nodebug__)) _mm512_loadu_pd(double const *__p) { @@ -946,6 +964,24 @@ _mm512_loadu_ps(float const *__p) return ((struct __loadu_ps*)__p)->__v; } +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_load_ps(double const *__p) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_load_pd(float const *__p) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + /* SIMD store ops */ static __inline void __attribute__ ((__always_inline__, __nodebug__)) @@ -988,9 +1024,9 @@ _mm512_storeu_ps(void *__P, __m512 __A) } static __inline void __attribute__ ((__always_inline__, __nodebug__)) -_mm512_store_ps(void *__P, __m512 __A) +_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { - *(__m512*)__P = __A; + __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } static __inline void __attribute__ ((__always_inline__, __nodebug__)) @@ -999,6 +1035,19 @@ _mm512_store_pd(void *__P, __m512d __A) *(__m512d*)__P = __A; } +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, + (__mmask16) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_store_ps(void *__P, __m512 __A) +{ + *(__m512*)__P = __A; +} + /* Mask ops */ static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 857274d741..d63f53d9cd 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -75,6 +75,13 @@ void test_mm512_storeu_pd(void *p, __m512d a) _mm512_storeu_pd(p, a); } +void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m) +{ + // CHECK-LABEL: @test_mm512_mask_store_ps + // CHECK: @llvm.x86.avx512.mask.store.ps.512 + _mm512_mask_store_ps(p, m, a); +} + void test_mm512_store_ps(void *p, __m512 a) { // CHECK-LABEL: @test_mm512_store_ps @@ -82,6 +89,13 @@ void test_mm512_store_ps(void *p, __m512 a) _mm512_store_ps(p, a); } +void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m) +{ + // CHECK-LABEL: @test_mm512_mask_store_pd + // CHECK: @llvm.x86.avx512.mask.store.pd.512 + _mm512_mask_store_pd(p, m, a); +} + void test_mm512_store_pd(void *p, __m512d a) { // CHECK-LABEL: @test_mm512_store_pd @@ -103,6 +117,34 @@ __m512d test_mm512_loadu_pd(void *p) return _mm512_loadu_pd(p); } +__m512 test_mm512_maskz_load_ps(void *p, __mmask16 m) +{ + // CHECK-LABEL: @test_mm512_maskz_load_ps + // CHECK: @llvm.x86.avx512.mask.load.ps.512 + return _mm512_maskz_load_ps(m, p); +} + +__m512 test_mm512_load_ps(void *p) +{ + // CHECK-LABEL: @test_mm512_load_ps + // CHECK: @llvm.x86.avx512.mask.load.ps.512 + return _mm512_load_ps(p); +} + +__m512d test_mm512_maskz_load_pd(void *p, __mmask8 m) +{ + // CHECK-LABEL: @test_mm512_maskz_load_pd + // CHECK: @llvm.x86.avx512.mask.load.pd.512 + return _mm512_maskz_load_pd(m, p); +} + +__m512d test_mm512_load_pd(void *p) +{ + // CHECK-LABEL: @test_mm512_load_pd + // CHECK: @llvm.x86.avx512.mask.load.pd.512 + return _mm512_load_pd(p); +} + __m512d test_mm512_set1_pd(double d) { // CHECK-LABEL: @test_mm512_set1_pd -- 2.40.0