BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "")
BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "")
BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_permdf256, "V4dV4dIc", "")
BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIc", "")
BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "")
BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "")
BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "")
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
__m256d __V = (V); \
- (__m256d)__builtin_ia32_permdf256((__v4df)__V, (M)); })
+ (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
+ (M) & 0x3, ((M) & 0xc) >> 2, \
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_permutevar8x32_ps(__m256 a, __m256 b)
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
__m256i __V = (V); \
- (__m256i)__builtin_ia32_permdi256(__V, (M)); })
+ (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
+ (M) & 0x3, ((M) & 0xc) >> 2, \
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
}
__m256d test_mm256_permute4x64_pd(__m256d a) {
- // CHECK: @llvm.x86.avx2.permpd
+ // CHECK: shufflevector{{.*}}<i32 1, i32 2, i32 1, i32 0>
return _mm256_permute4x64_pd(a, 25);
}
}
__m256i test_mm256_permute4x64_epi64(__m256i a) {
- // CHECK: @llvm.x86.avx2.permq
+ // CHECK: shufflevector{{.*}}<i32 3, i32 0, i32 2, i32 0>
return _mm256_permute4x64_epi64(a, 35);
}