TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2")
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute2f128_pd(V1, V2, M) \
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), (M))
+ (__v4df)(__m256d)(V2), (int)(M))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [8 x float], as specified by the immediate integer operand.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute2f128_ps(V1, V2, M) \
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (M))
+ (__v8sf)(__m256)(V2), (int)(M))
/// Permutes 128-bit data values stored in two 256-bit integer vectors,
/// as specified by the immediate integer operand.
/// \returns A 256-bit integer vector containing the copied values.
#define _mm256_permute2f128_si256(V1, V2, M) \
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), (M))
+ (__v8si)(__m256i)(V2), (int)(M))
/* Vector Blend */
/// Merges 64-bit double-precision data values stored in either of the