From 5aa44810df348a2291ce9af32968283dd437f4d7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 15 May 2016 21:26:20 +0000 Subject: [PATCH] [AVX512] Add intrinsics for 512-bit insertf32x8/insertf32x4/inserti32x4. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@269617 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 3 ++ lib/Headers/avx512dqintrin.h | 24 +++++++++++++++ lib/Headers/avx512fintrin.h | 48 +++++++++++++++++++++++++++++ test/CodeGen/avx512dq-builtins.c | 18 +++++++++++ test/CodeGen/avx512f-builtins.c | 36 ++++++++++++++++++++++ 5 files changed, 129 insertions(+) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 71423df909..3878f7fe81 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -2150,6 +2150,7 @@ TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512d TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq") TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq") TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq") TARGET_BUILTIN(__builtin_ia32_inserti64x2_512_mask, "V8LLiV8LLiV2LLiIiV8LLiUc","","avx512dq") @@ -2159,6 +2160,8 @@ TARGET_BUILTIN(__builtin_ia32_insertf64x2_256_mask, "V4dV4dV2dIiV4dUc","","avx51 TARGET_BUILTIN(__builtin_ia32_inserti64x2_256_mask, "V4LLiV4LLiV2LLiIiV4LLiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_insertf32x4_256_mask, "V8fV8fV4fIiV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_inserti32x4_256_mask, "V8iV8iV4iIiV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4_mask, "V16fV16fV4fIiV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_inserti32x4_mask, "V16iV16iV4iIiV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc","","avx512vl") diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index ab970f149f..74e920fdae 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -1027,6 +1027,30 @@ __builtin_ia32_extracti64x2_512_mask ((__v8di)( __A),\ (__mmask8) ( __U));\ }) +#define _mm512_insertf32x8( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v16sf) _mm512_setzero_ps (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_insertf32x8( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v16sf)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_insertf32x8( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v16sf) _mm512_setzero_ps (),\ + (__mmask16)( __U));\ +}) + #define _mm512_insertf64x2( __A, __B, __imm) __extension__ ({ \ __builtin_ia32_insertf64x2_512_mask ((__v8df)( __A),\ (__v2df)( __B),\ diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index d70709f093..f967f6ec82 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -7427,6 +7427,54 @@ __builtin_ia32_inserti64x4_mask ((__v8di)( __A),\ (__mmask8)( __U));\ }) +#define _mm512_insertf32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_mask ((__v16sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v16sf) _mm512_undefined_ps (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_insertf32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_mask ((__v16sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v16sf)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_insertf32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_mask ((__v16sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v16sf) _mm512_setzero_ps (),\ + (__mmask16)( __U));\ +}) + +#define _mm512_inserti32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_mask ((__v16si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v16si) _mm512_setzero_si512 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_inserti32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_mask ((__v16si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v16si)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_inserti32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_mask ((__v16si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v16si) _mm512_setzero_si512 (),\ + (__mmask16)( __U));\ +}) + #define _mm512_getmant_round_pd( __A, __B, __C, __R) __extension__ ({ \ __builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ (__C << 2) |( __B),\ diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c index f9cc917a9c..f9bc504a5e 100644 --- a/test/CodeGen/avx512dq-builtins.c +++ b/test/CodeGen/avx512dq-builtins.c @@ -946,6 +946,24 @@ __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); } +__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) { + // CHECK-LABEL: @test_mm512_insertf32x8 + // CHECK: @llvm.x86.avx512.mask.insertf32x8 + return _mm512_insertf32x8(__A, __B, 1); +} + +__m512 test_mm512_mask_insertf32x8(__m512 __W, __mmask16 __U, __m512 __A, __m256 __B) { + // CHECK-LABEL: @test_mm512_mask_insertf32x8 + // CHECK: @llvm.x86.avx512.mask.insertf32x8 + return _mm512_mask_insertf32x8(__W, __U, __A, __B, 1); +} + +__m512 test_mm512_maskz_insertf32x8(__mmask16 __U, __m512 __A, __m256 __B) { + // CHECK-LABEL: @test_mm512_maskz_insertf32x8 + // CHECK: @llvm.x86.avx512.mask.insertf32x8 + return _mm512_maskz_insertf32x8(__U, __A, __B, 1); +} + __m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) { // CHECK-LABEL: @test_mm512_insertf64x2 // CHECK: @llvm.x86.avx512.mask.insertf64x2 diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 178b0fb0c1..acc04f6d91 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -4483,6 +4483,42 @@ __m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { return _mm512_maskz_inserti64x4(__U, __A, __B, 1); } +__m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) { + // CHECK-LABEL: @test_mm512_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm512_insertf32x4(__A, __B, 1); +} + +__m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 __B) { + // CHECK-LABEL: @test_mm512_mask_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm512_mask_insertf32x4(__W, __U, __A, __B, 1); +} + +__m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) { + // CHECK-LABEL: @test_mm512_maskz_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm512_maskz_insertf32x4(__U, __A, __B, 1); +} + +__m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm512_inserti32x4(__A, __B, 1); +} + +__m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_mask_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm512_mask_inserti32x4(__W, __U, __A, __B, 1); +} + +__m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_maskz_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm512_maskz_inserti32x4(__U, __A, __B, 1); +} + __m512d test_mm512_getmant_round_pd(__m512d __A) { // CHECK-LABEL: @test_mm512_getmant_round_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 -- 2.40.0