#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
+typedef short __v2hi __attribute__((__vector_size__(4)));
+typedef char __v4qi __attribute__((__vector_size__(4)));
+typedef char __v2qi __attribute__((__vector_size__(2)));
+
/* Integer compare */
#define _mm_cmpeq_epi32_mask(A, B) \
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtepi32_epi8 (__m128i __A)
{
- return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
- (__v16qi)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
+ 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtepi32_epi8 (__m256i __A)
{
- return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
- (__v16qi)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v8si)__A, __v8qi),
+ (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtepi32_epi16 (__m128i __A)
{
- return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
- (__v8hi) _mm_setzero_si128 (),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
+ 2, 3, 4, 5, 6, 7);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtepi64_epi8 (__m128i __A)
{
- return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
- (__v16qi) _mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtepi64_epi8 (__m256i __A)
{
- return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
- (__v16qi) _mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
+ 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtepi64_epi32 (__m128i __A)
{
- return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
- (__v4si)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtepi64_epi16 (__m128i __A)
{
- return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
- (__v8hi) _mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
+ 3, 3, 3, 3);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtepi64_epi16 (__m256i __A)
{
- return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
- (__v8hi)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector(
+ __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
+ 2, 3, 4, 5, 6, 7);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
__m128i test_mm_cvtepi32_epi8(__m128i __A) {
// CHECK-LABEL: @test_mm_cvtepi32_epi8
- // CHECK: @llvm.x86.avx512.mask.pmov.db.128
+ // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8>
+ // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
return _mm_cvtepi32_epi8(__A);
}
__m128i test_mm256_cvtepi32_epi8(__m256i __A) {
// CHECK-LABEL: @test_mm256_cvtepi32_epi8
- // CHECK: @llvm.x86.avx512.mask.pmov.db.256
+ // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8>
+ // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm256_cvtepi32_epi8(__A);
}
__m128i test_mm_cvtepi32_epi16(__m128i __A) {
// CHECK-LABEL: @test_mm_cvtepi32_epi16
- // CHECK: @llvm.x86.avx512.mask.pmov.dw.128
+ // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16>
+ // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
return _mm_cvtepi32_epi16(__A);
}
__m128i test_mm_cvtepi64_epi8(__m128i __A) {
// CHECK-LABEL: @test_mm_cvtepi64_epi8
- // CHECK: @llvm.x86.avx512.mask.pmov.qb.128
+ // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8>
+ // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
return _mm_cvtepi64_epi8(__A);
}
__m128i test_mm256_cvtepi64_epi8(__m256i __A) {
// CHECK-LABEL: @test_mm256_cvtepi64_epi8
- // CHECK: @llvm.x86.avx512.mask.pmov.qb.256
+ // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8>
+ // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
return _mm256_cvtepi64_epi8(__A);
}
__m128i test_mm_cvtepi64_epi32(__m128i __A) {
// CHECK-LABEL: @test_mm_cvtepi64_epi32
- // CHECK: @llvm.x86.avx512.mask.pmov.qd.128
+ // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32>
+ // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
return _mm_cvtepi64_epi32(__A);
}
__m128i test_mm_cvtepi64_epi16(__m128i __A) {
// CHECK-LABEL: @test_mm_cvtepi64_epi16
- // CHECK: @llvm.x86.avx512.mask.pmov.qw.128
+ // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16>
+ // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
return _mm_cvtepi64_epi16(__A);
}
__m128i test_mm256_cvtepi64_epi16(__m256i __A) {
// CHECK-LABEL: @test_mm256_cvtepi64_epi16
- // CHECK: @llvm.x86.avx512.mask.pmov.qw.256
+ // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16>
+ // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
return _mm256_cvtepi64_epi16(__A);
}