return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
}
-#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
- __m256i __a = (a); \
- __m256i __b = (b); \
- (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
+#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
+ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (n)); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_and_si256(__m256i __a, __m256i __b)
(__v32qi)__M);
}
-#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
- (((M) & 0x01) ? 16 : 0), \
- (((M) & 0x02) ? 17 : 1), \
- (((M) & 0x04) ? 18 : 2), \
- (((M) & 0x08) ? 19 : 3), \
- (((M) & 0x10) ? 20 : 4), \
- (((M) & 0x20) ? 21 : 5), \
- (((M) & 0x40) ? 22 : 6), \
- (((M) & 0x80) ? 23 : 7), \
- (((M) & 0x01) ? 24 : 8), \
- (((M) & 0x02) ? 25 : 9), \
+#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \
+ (__v16hi)(__m256i)(V2), \
+ (((M) & 0x01) ? 16 : 0), \
+ (((M) & 0x02) ? 17 : 1), \
+ (((M) & 0x04) ? 18 : 2), \
+ (((M) & 0x08) ? 19 : 3), \
+ (((M) & 0x10) ? 20 : 4), \
+ (((M) & 0x20) ? 21 : 5), \
+ (((M) & 0x40) ? 22 : 6), \
+ (((M) & 0x80) ? 23 : 7), \
+ (((M) & 0x01) ? 24 : 8), \
+ (((M) & 0x02) ? 25 : 9), \
(((M) & 0x04) ? 26 : 10), \
(((M) & 0x08) ? 27 : 11), \
(((M) & 0x10) ? 28 : 12), \
}
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
+ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
+ (__v8si)_mm256_set1_epi32(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+ (__v16hi)_mm256_set1_epi16(0), \
0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0x0c) >> 2), \
12 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+ (__v16hi)_mm256_set1_epi16(0), \
(imm) & 0x3,((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7, \
}
#define _mm256_slli_si256(a, count) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
+ (__m256i)__builtin_ia32_pslldqi256((__m256i)(a), (count)*8); })
#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
}
#define _mm256_srli_si256(a, count) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
+ (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (count)*8); })
#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
}
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
- __m128i __V1 = (V1); \
- __m128i __V2 = (V2); \
- (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
+ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \
+ (__v4si)(__m128i)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
+ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
}
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
- __m256d __V = (V); \
- (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
+ (__v4df)_mm256_setzero_pd(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
}
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
- __m256i __V = (V); \
- (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
+ (__v4di)_mm256_setzero_si256(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
+ (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
#define _mm256_extracti128_si256(V, M) __extension__ ({ \
(__m128i)__builtin_shufflevector( \
}
#define _mm256_round_pd(V, M) __extension__ ({ \
- __m256d __V = (V); \
- (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
+ (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
#define _mm256_round_ps(V, M) __extension__ ({ \
- __m256 __V = (V); \
- (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
+ (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
}
#define _mm_permute_pd(A, C) __extension__ ({ \
- __m128d __A = (A); \
- (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
+ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
+ (__v2df)_mm_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1); })
#define _mm256_permute_pd(A, C) __extension__ ({ \
- __m256d __A = (A); \
- (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+ (__v4df)_mm256_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1, \
2 + (((C) & 0x4) >> 2), \
2 + (((C) & 0x8) >> 3)); })
#define _mm_permute_ps(A, C) __extension__ ({ \
- __m128 __A = (A); \
- (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
+ (__v4sf)_mm_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
#define _mm256_permute_ps(A, C) __extension__ ({ \
- __m256 __A = (A); \
- (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
4 + (((C) & 0x03) >> 0), \
4 + (((C) & 0xc0) >> 6)); })
#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
- __m256d __V1 = (V1); \
- __m256d __V2 = (V2); \
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
+ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (M)); })
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)); })
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
+ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (M)); })
/* Vector Blend */
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
- __m256d __V1 = (V1); \
- __m256d __V2 = (V2); \
- (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
/* Vector Dot Product */
#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+ (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)); })
/* Vector shuffle */
#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
- __m256 __a = (a); \
- __m256 __b = (b); \
- (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
- (mask) & 0x3, ((mask) & 0xc) >> 2, \
- (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
- ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
- (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), \
+ (mask) & 0x3, \
+ ((mask) & 0xc) >> 2, \
+ (((mask) & 0x30) >> 4) + 8, \
+ (((mask) & 0xc0) >> 6) + 8, \
+ ((mask) & 0x3) + 4, \
+ (((mask) & 0xc) >> 2) + 4, \
+ (((mask) & 0x30) >> 4) + 12, \
+ (((mask) & 0xc0) >> 6) + 12); })
#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
- __m256d __a = (a); \
- __m256d __b = (b); \
- (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
- (mask) & 0x1, \
- (((mask) & 0x2) >> 1) + 4, \
- (((mask) & 0x4) >> 2) + 2, \
- (((mask) & 0x8) >> 3) + 6); })
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), \
+ (mask) & 0x1, \
+ (((mask) & 0x2) >> 1) + 4, \
+ (((mask) & 0x4) >> 2) + 2, \
+ (((mask) & 0x8) >> 3) + 6); })
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
#define _mm_cmp_pd(a, b, c) __extension__ ({ \
- __m128d __a = (a); \
- __m128d __b = (b); \
- (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
+ (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ps(a, b, c) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
+ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)); })
#define _mm256_cmp_pd(a, b, c) __extension__ ({ \
- __m256d __a = (a); \
- __m256d __b = (b); \
- (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
+ (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (c)); })
#define _mm256_cmp_ps(a, b, c) __extension__ ({ \
- __m256 __a = (a); \
- __m256 __b = (b); \
- (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
+ (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (c)); })
#define _mm_cmp_sd(a, b, c) __extension__ ({ \
- __m128d __a = (a); \
- __m128d __b = (b); \
- (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
+ (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ss(a, b, c) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
+ (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)); })
static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi32(__m256i __a, const int __imm)
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
- __m128 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
+ (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
- __m256 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
+ (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
static __inline __m128 __DEFAULT_FN_ATTRS
_mm_cvtph_ps(__m128i __a)
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
#define _mm_round_ps(X, M) __extension__ ({ \
- __m128 __X = (X); \
- (__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); })
+ (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
#define _mm_round_ss(X, Y, M) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- (__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); })
+ (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)); })
#define _mm_round_pd(X, M) __extension__ ({ \
- __m128d __X = (X); \
- (__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); })
+ (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
#define _mm_round_sd(X, Y, M) __extension__ ({ \
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- (__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); })
+ (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)); })
/* SSE4 Packed Blending Intrinsics. */
#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
- __m128d __V1 = (V1); \
- __m128d __V2 = (V2); \
- (__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \
+ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), \
(((M) & 0x01) ? 2 : 0), \
(((M) & 0x02) ? 3 : 1)); })
#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
- __m128 __V1 = (V1); \
- __m128 __V2 = (V2); \
- (__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
}
#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
- __m128i __V1 = (V1); \
- __m128i __V2 = (V2); \
- (__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \
+ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
/* SSE4 Floating Point Dot Product Instructions. */
#define _mm_dp_ps(X, Y, M) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- (__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); })
+ (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)); })
#define _mm_dp_pd(X, Y, M) __extension__ ({\
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
+ (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)); })
/* SSE4 Streaming Load Hint Instruction. */
static __inline__ __m128i __DEFAULT_FN_ATTRS
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
- __m128i __X = (X); \
- __m128i __Y = (Y); \
- (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
+ (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (M)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_minpos_epu16(__m128i __V)
}
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
- __m128i __a = (a); \
- __m128i __b = (b); \
- (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
+ (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (n)); })
#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
- __m64 __a = (a); \
- __m64 __b = (b); \
- (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
+ (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a, __m128i __b)
}
#define _mm_shuffle_pi16(a, n) __extension__ ({ \
- __m64 __a = (a); \
- (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
+ (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); })
static __inline__ void __DEFAULT_FN_ATTRS
_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
}
#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
(mask) & 0x3, ((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 4, \
(((mask) & 0xc0) >> 6) + 4); })
}
#define _mm_roti_epi8(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
#define _mm_roti_epi16(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
#define _mm_roti_epi32(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
#define _mm_roti_epi64(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A, __m128i __B)
}
#define _mm_com_epu8(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epu16(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epu32(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)); })
#define _mm_com_epu64(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)); })
#define _mm_com_epi8(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epi16(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epi32(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)); })
#define _mm_com_epi64(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)); })
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
}
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- __m128i __C = (C); \
- (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
- (__v2di)__C, (I)); })
+ (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(C), (I)); })
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
- __m256d __X = (X); \
- __m256d __Y = (Y); \
- __m256i __C = (C); \
- (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
- (__v4di)__C, (I)); })
+ (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(C), (I)); })
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- __m128i __C = (C); \
- (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
- (__v4si)__C, (I)); })
+ (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(C), (I)); })
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
- __m256 __X = (X); \
- __m256 __Y = (Y); \
- __m256i __C = (C); \
- (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
- (__v8si)__C, (I)); })
+ (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(C), (I)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)