TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2 (__m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)_mm512_undefined_ps(),
- (__mmask16) -1);
+ return (__m512)__builtin_shufflevector((__v4sf)__A,
+ (__v4sf)_mm_undefined_ps(),
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)
- __O, __M);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+ (__v16sf)_mm512_broadcast_f32x2(__A),
+ (__v16sf)__O);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)_mm512_setzero_ps (),
- __M);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+ (__v16sf)_mm512_broadcast_f32x2(__A),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2 (__m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)_mm512_setzero_si512(),
- (__mmask16) -1);
+ return (__m512i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)
- __O, __M);
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+ (__v16si)_mm512_broadcast_i32x2(__A),
+ (__v16si)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)_mm512_setzero_si512 (),
- __M);
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+ (__v16si)_mm512_broadcast_i32x2(__A),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_f32x2 (__m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf)_mm256_undefined_ps(),
- (__mmask8) -1);
+ return (__m256)__builtin_shufflevector((__v4sf)__A,
+ (__v4sf)_mm_undefined_ps(),
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf) __O,
- __M);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+ (__v8sf)_mm256_broadcast_f32x2(__A),
+ (__v8sf)__O);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf) _mm256_setzero_ps (),
- __M);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+ (__v8sf)_mm256_broadcast_f32x2(__A),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcast_i32x2 (__m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si) __O,
- __M);
+ return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+ (__v4si)_mm_broadcast_i32x2(__A),
+ (__v4si)__O);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si) _mm_setzero_si128 (),
- __M);
+ return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+ (__v4si)_mm_broadcast_i32x2(__A),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcast_i32x2 (__m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si)_mm256_undefined_si256(),
- (__mmask8) -1);
+ return (__m256i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si) __O,
- __M);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_broadcast_i32x2(__A),
+ (__v8si)__O);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si) _mm256_setzero_si256 (),
- __M);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_broadcast_i32x2(__A),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
__m512 test_mm512_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: @test_mm512_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm512_broadcast_f32x2(__A);
}
__m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
// CHECK-LABEL: @test_mm512_mask_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_broadcast_f32x2(__O, __M, __A);
}
__m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
// CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_broadcast_f32x2(__M, __A);
}
__m512i test_mm512_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm512_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm512_broadcast_i32x2(__A);
}
__m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
// CHECK-LABEL: @test_mm512_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_broadcast_i32x2(__O, __M, __A);
}
__m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_broadcast_i32x2(__M, __A);
}
__m256 test_mm256_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: @test_mm256_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm256_broadcast_f32x2(__A);
}
__m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
// CHECK-LABEL: @test_mm256_mask_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_broadcast_f32x2(__O, __M, __A);
}
__m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
// CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_broadcast_f32x2(__M, __A);
}
__m128i test_mm_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
return _mm_broadcast_i32x2(__A);
}
__m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_broadcast_i32x2(__O, __M, __A);
}
__m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_broadcast_i32x2(__M, __A);
}
__m256i test_mm256_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm256_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm256_broadcast_i32x2(__A);
}
__m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm256_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_broadcast_i32x2(__O, __M, __A);
}
__m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_broadcast_i32x2(__M, __A);
}