TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pshufd512_mask, "V16iV16iIiV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_pshufd256_mask, "V8iV8iIiV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pshufd128_mask, "V4iV4iIiV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f")
}
#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
- (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
- (__v16si)_mm512_undefined_epi32(), \
- (__mmask16)-1); })
+ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ 0 + (((I) & 0x03) >> 0), \
+ 0 + (((I) & 0x0c) >> 2), \
+ 0 + (((I) & 0x30) >> 4), \
+ 0 + (((I) & 0xc0) >> 6), \
+ 4 + (((I) & 0x03) >> 0), \
+ 4 + (((I) & 0x0c) >> 2), \
+ 4 + (((I) & 0x30) >> 4), \
+ 4 + (((I) & 0xc0) >> 6), \
+ 8 + (((I) & 0x03) >> 0), \
+ 8 + (((I) & 0x0c) >> 2), \
+ 8 + (((I) & 0x30) >> 4), \
+ 8 + (((I) & 0xc0) >> 6), \
+ 12 + (((I) & 0x03) >> 0), \
+ 12 + (((I) & 0x0c) >> 2), \
+ 12 + (((I) & 0x30) >> 4), \
+ 12 + (((I) & 0xc0) >> 6)); })
#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
- (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_epi32((A), (I)), \
+ (__v16si)(__m512i)(W)); })
#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
- (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_epi32((A), (I)), \
+ (__v16si)_mm512_setzero_si512()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
}
#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
- (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_epi32((A), (I)), \
+ (__v8si)(__m256i)(W)); })
#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
- (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_epi32((A), (I)), \
+ (__v8si)_mm256_setzero_si256()); })
#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
- (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \
- (__v4si)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shuffle_epi32((A), (I)), \
+ (__v4si)(__m128i)(W)); })
#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
- (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)(U)); })
-
+ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shuffle_epi32((A), (I)), \
+ (__v4si)_mm_setzero_si128()); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
case X86::BI__builtin_ia32_fpclasspd512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
- case X86::BI__builtin_ia32_pshufd512_mask:
- case X86::BI__builtin_ia32_pshufd256_mask:
- case X86::BI__builtin_ia32_pshufd128_mask:
i = 1; l = 0; u = 255;
break;
case X86::BI__builtin_ia32_palignr:
__m512i test_mm512_shuffle_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.512
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
return _mm512_shuffle_epi32(__A, 1);
}
__m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.512
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_epi32(__W, __U, __A, 1);
}
__m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.512
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_epi32(__U, __A, 1);
}
// CHECK-LABEL: @test_mm512_setzero_pd
// CHECK: zeroinitializer
return _mm512_setzero_pd();
-}
\ No newline at end of file
+}
__m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.128
+ // CHECK: shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_shuffle_epi32(__W, __U, __A, 1);
}
__m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.128
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_shuffle_epi32(__U, __A, 2);
}
__m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.256
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_shuffle_epi32(__W, __U, __A, 2);
}
__m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_epi32
- // CHECK: @llvm.x86.avx512.mask.pshuf.d.256
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_shuffle_epi32(__U, __A, 2);
}