(__mmask8)(U))
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
- _mm512_mask_shldi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (int)(I), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U))
#define _mm512_shldi_epi64(A, B, I) \
- _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (int)(I), \
+ (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8)-1)
#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \
(__mmask16)(U))
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
- _mm512_mask_shldi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (int)(I), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U))
#define _mm512_shldi_epi32(A, B, I) \
- _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (int)(I), \
+ (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16)-1)
#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \
(__mmask32)(U))
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
- _mm512_mask_shldi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), \
+ (int)(I), \
+ (__v32hi)_mm512_setzero_si512(), \
+ (__mmask32)(U))
#define _mm512_shldi_epi16(A, B, I) \
- _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), \
+ (int)(I), \
+ (__v32hi)_mm512_undefined_epi32(), \
+ (__mmask32)-1)
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \
(__mmask8)(U))
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
- _mm512_mask_shrdi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (int)(I), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U))
#define _mm512_shrdi_epi64(A, B, I) \
- _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (int)(I), \
+ (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8)-1)
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \
(__mmask16)(U))
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
- _mm512_mask_shrdi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (int)(I), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U))
#define _mm512_shrdi_epi32(A, B, I) \
- _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (int)(I), \
+ (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16)-1)
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \
(__mmask32)(U))
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
- _mm512_mask_shrdi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), \
+ (int)(I), \
+ (__v32hi)_mm512_setzero_si512(), \
+ (__mmask32)(U))
#define _mm512_shrdi_epi16(A, B, I) \
- _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
+ (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), \
+ (int)(I), \
+ (__v32hi)_mm512_undefined_epi32(), \
+ (__mmask32)-1)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
(__mmask8)(U))
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
- _mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(I), \
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm256_shldi_epi64(A, B, I) \
- _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(I), \
+ (__v4di)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm_mask_shldi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi64(U, A, B, I) \
- _mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (int)(I), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shldi_epi64(A, B, I) \
- _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (int)(I), \
+ (__v2di)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \
(__mmask8)(U))
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
- _mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (int)(I), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm256_shldi_epi32(A, B, I) \
- _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (int)(I), \
+ (__v8si)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm_mask_shldi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi32(U, A, B, I) \
- _mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (int)(I), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shldi_epi32(A, B, I) \
- _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (int)(I), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \
(__mmask16)(U))
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
- _mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), \
+ (int)(I), \
+ (__v16hi)_mm256_setzero_si256(), \
+ (__mmask16)(U))
#define _mm256_shldi_epi16(A, B, I) \
- _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), \
+ (int)(I), \
+ (__v16hi)_mm256_undefined_si256(), \
+ (__mmask16)-1)
#define _mm_mask_shldi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi16(U, A, B, I) \
- _mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), \
+ (int)(I), \
+ (__v8hi)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shldi_epi16(A, B, I) \
- _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), \
+ (int)(I), \
+ (__v8hi)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \
(__mmask8)(U))
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
- _mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(I), \
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm256_shrdi_epi64(A, B, I) \
- _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(I), \
+ (__v4di)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
- _mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (int)(I), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shrdi_epi64(A, B, I) \
- _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (int)(I), \
+ (__v2di)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \
(__mmask8)(U))
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
- _mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (int)(I), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm256_shrdi_epi32(A, B, I) \
- _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (int)(I), \
+ (__v8si)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
- _mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (int)(I), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shrdi_epi32(A, B, I) \
- _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (int)(I), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \
(__mmask16)(U))
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
- _mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), \
+ (int)(I), \
+ (__v16hi)_mm256_setzero_si256(), \
+ (__mmask16)(U))
#define _mm256_shrdi_epi16(A, B, I) \
- _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+ (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), \
+ (int)(I), \
+ (__v16hi)_mm256_undefined_si256(), \
+ (__mmask16)-1)
#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi16(U, A, B, I) \
- _mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), \
+ (int)(I), \
+ (__v8hi)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm_shrdi_epi16(A, B, I) \
- _mm_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+ (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), \
+ (int)(I), \
+ (__v8hi)_mm_undefined_si128(), \
+ (__mmask8)-1)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)