static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_add_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)((__v32qi)__a + (__v32qi)__b);
+ return (__m256i)((__v32qu)__a + (__v32qu)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_add_epi16(__m256i __a, __m256i __b)
{
- return (__m256i)((__v16hi)__a + (__v16hi)__b);
+ return (__m256i)((__v16hu)__a + (__v16hu)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_add_epi32(__m256i __a, __m256i __b)
{
- return (__m256i)((__v8si)__a + (__v8si)__b);
+ return (__m256i)((__v8su)__a + (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_add_epi64(__m256i __a, __m256i __b)
{
- return (__m256i)((__v4di)__a + (__v4di)__b);
+ return (__m256i)((__v4du)__a + (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_and_si256(__m256i __a, __m256i __b)
{
- return (__m256i)((__v4di)__a & (__v4di)__b);
+ return (__m256i)((__v4du)__a & (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_andnot_si256(__m256i __a, __m256i __b)
{
- return (__m256i)(~(__v4di)__a & (__v4di)__b);
+ return (__m256i)(~(__v4du)__a & (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mullo_epi16(__m256i __a, __m256i __b)
{
- return (__m256i)((__v16hi)__a * (__v16hi)__b);
+ return (__m256i)((__v16hu)__a * (__v16hu)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mullo_epi32 (__m256i __a, __m256i __b)
{
- return (__m256i)((__v8si)__a * (__v8si)__b);
+ return (__m256i)((__v8su)__a * (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_or_si256(__m256i __a, __m256i __b)
{
- return (__m256i)((__v4di)__a | (__v4di)__b);
+ return (__m256i)((__v4du)__a | (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sub_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)((__v32qi)__a - (__v32qi)__b);
+ return (__m256i)((__v32qu)__a - (__v32qu)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sub_epi16(__m256i __a, __m256i __b)
{
- return (__m256i)((__v16hi)__a - (__v16hi)__b);
+ return (__m256i)((__v16hu)__a - (__v16hu)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sub_epi32(__m256i __a, __m256i __b)
{
- return (__m256i)((__v8si)__a - (__v8si)__b);
+ return (__m256i)((__v8su)__a - (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sub_epi64(__m256i __a, __m256i __b)
{
- return (__m256i)((__v4di)__a - (__v4di)__b);
+ return (__m256i)((__v4du)__a - (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_xor_si256(__m256i __a, __m256i __b)
{
- return (__m256i)((__v4di)__a ^ (__v4di)__b);
+ return (__m256i)((__v4du)__a ^ (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi8 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v64qi) __A + (__v64qi) __B);
+ return (__m512i) ((__v64qu) __A + (__v64qu) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi8 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v64qi) __A - (__v64qi) __B);
+ return (__m512i) ((__v64qu) __A - (__v64qu) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi16 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v32hi) __A + (__v32hi) __B);
+ return (__m512i) ((__v32hu) __A + (__v32hu) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi16 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v32hi) __A - (__v32hi) __B);
+ return (__m512i) ((__v32hu) __A - (__v32hu) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v32hi) __A * (__v32hi) __B);
+ return (__m512i) ((__v32hu) __A * (__v32hu) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
- return (__m512i) ((__v8di) __A * (__v8di) __B);
+ return (__m512i) ((__v8du) __A * (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_xor_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8di) __A ^ (__v8di) __B);
+ return (__m512d) ((__v8du) __A ^ (__v8du) __B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_xor_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16si) __A ^ (__v16si) __B);
+ return (__m512) ((__v16su) __A ^ (__v16su) __B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_or_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8di) __A | (__v8di) __B);
+ return (__m512d) ((__v8du) __A | (__v8du) __B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_or_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16si) __A | (__v16si) __B);
+ return (__m512) ((__v16su) __A | (__v16su) __B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_and_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8di) __A & (__v8di) __B);
+ return (__m512d) ((__v8du) __A & (__v8du) __B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_and_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16si) __A & (__v16si) __B);
+ return (__m512) ((__v16su) __A & (__v16su) __B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
typedef long long __v8di __attribute__((__vector_size__(64)));
typedef int __v16si __attribute__((__vector_size__(64)));
+/* Unsigned types */
+typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
+typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
+typedef unsigned int __v16su __attribute__((__vector_size__(64)));
+
typedef float __m512 __attribute__((__vector_size__(64)));
typedef double __m512d __attribute__((__vector_size__(64)));
typedef long long __m512i __attribute__((__vector_size__(64)));
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_epi32(__m512i __a, __m512i __b)
{
- return (__m512i)((__v16si)__a & (__v16si)__b);
+ return (__m512i)((__v16su)__a & (__v16su)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_epi64(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a & (__v8di)__b);
+ return (__m512i)((__v8du)__a & (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_si512 (__m512i __A, __m512i __B)
{
- return (__m512i)(~(__A) & __B);
+ return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_epi32(__m512i __a, __m512i __b)
{
- return (__m512i)((__v16si)__a | (__v16si)__b);
+ return (__m512i)((__v16su)__a | (__v16su)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_epi64(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a | (__v8di)__b);
+ return (__m512i)((__v8du)__a | (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_epi32(__m512i __a, __m512i __b)
{
- return (__m512i)((__v16si)__a ^ (__v16si)__b);
+ return (__m512i)((__v16su)__a ^ (__v16su)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_epi64(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a ^ (__v8di)__b);
+ return (__m512i)((__v8du)__a ^ (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_si512(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a & (__v8di)__b);
+ return (__m512i)((__v8du)__a & (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_si512(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a | (__v8di)__b);
+ return (__m512i)((__v8du)__a | (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_si512(__m512i __a, __m512i __b)
{
- return (__m512i)((__v8di)__a ^ (__v8di)__b);
+ return (__m512i)((__v8du)__a ^ (__v8du)__b);
}
/* Arithmetic */
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i) ((__v8di) __A + (__v8di) __B);
+ return (__m512i) ((__v8du) __A + (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i) ((__v8di) __A - (__v8di) __B);
+ return (__m512i) ((__v8du) __A - (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) ((__v16si) __A + (__v16si) __B);
+ return (__m512i) ((__v16su) __A + (__v16su) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) ((__v16si) __A - (__v16si) __B);
+ return (__m512i) ((__v16su) __A - (__v16su) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i) ((__v16si) __A * (__v16si) __B);
+ return (__m512i) ((__v16su) __A * (__v16su) __B);
}
static __inline __m512i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
- return (__m256i) ((__v4di) __A * (__v4di) __B);
+ return (__m256i) ((__v4du) __A * (__v4du) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mullo_epi64 (__m128i __A, __m128i __B) {
- return (__m128i) ((__v2di) __A * (__v2di) __B);
+ return (__m128i) ((__v2du) __A * (__v2du) __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
typedef short __v16hi __attribute__ ((__vector_size__ (32)));
typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+/* Unsigned types */
+typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
+typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
+typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
+typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
+
/* We need an explicitly signed variant for char. Note that this shouldn't
* appear in the interface though. */
typedef signed char __v32qs __attribute__((__vector_size__(32)));
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_and_pd(__m256d __a, __m256d __b)
{
- return (__m256d)((__v4di)__a & (__v4di)__b);
+ return (__m256d)((__v4du)__a & (__v4du)__b);
}
/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_and_ps(__m256 __a, __m256 __b)
{
- return (__m256)((__v8si)__a & (__v8si)__b);
+ return (__m256)((__v8su)__a & (__v8su)__b);
}
/// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double], using
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_andnot_pd(__m256d __a, __m256d __b)
{
- return (__m256d)(~(__v4di)__a & (__v4di)__b);
+ return (__m256d)(~(__v4du)__a & (__v4du)__b);
}
/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float], using
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_andnot_ps(__m256 __a, __m256 __b)
{
- return (__m256)(~(__v8si)__a & (__v8si)__b);
+ return (__m256)(~(__v8su)__a & (__v8su)__b);
}
/// \brief Performs a bitwise OR of two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_or_pd(__m256d __a, __m256d __b)
{
- return (__m256d)((__v4di)__a | (__v4di)__b);
+ return (__m256d)((__v4du)__a | (__v4du)__b);
}
/// \brief Performs a bitwise OR of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_or_ps(__m256 __a, __m256 __b)
{
- return (__m256)((__v8si)__a | (__v8si)__b);
+ return (__m256)((__v8su)__a | (__v8su)__b);
}
/// \brief Performs a bitwise XOR of two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_xor_pd(__m256d __a, __m256d __b)
{
- return (__m256d)((__v4di)__a ^ (__v4di)__b);
+ return (__m256d)((__v4du)__a ^ (__v4du)__b);
}
/// \brief Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_xor_ps(__m256 __a, __m256 __b)
{
- return (__m256)((__v8si)__a ^ (__v8si)__b);
+ return (__m256)((__v8su)__a ^ (__v8su)__b);
}
/* Horizontal arithmetic */
/* Unsigned types */
typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
-typedef unsigned int __v4su __attribute__((__vector_size__(16)));
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_and_pd(__m128d __a, __m128d __b)
{
- return (__m128d)((__v4si)__a & (__v4si)__b);
+ return (__m128d)((__v4su)__a & (__v4su)__b);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_andnot_pd(__m128d __a, __m128d __b)
{
- return (__m128d)(~(__v4si)__a & (__v4si)__b);
+ return (__m128d)(~(__v4su)__a & (__v4su)__b);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_or_pd(__m128d __a, __m128d __b)
{
- return (__m128d)((__v4si)__a | (__v4si)__b);
+ return (__m128d)((__v4su)__a | (__v4su)__b);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_xor_pd(__m128d __a, __m128d __b)
{
- return (__m128d)((__v4si)__a ^ (__v4si)__b);
+ return (__m128d)((__v4su)__a ^ (__v4su)__b);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi8(__m128i __a, __m128i __b)
{
- return (__m128i)((__v16qi)__a + (__v16qi)__b);
+ return (__m128i)((__v16qu)__a + (__v16qu)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi16(__m128i __a, __m128i __b)
{
- return (__m128i)((__v8hi)__a + (__v8hi)__b);
+ return (__m128i)((__v8hu)__a + (__v8hu)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi32(__m128i __a, __m128i __b)
{
- return (__m128i)((__v4si)__a + (__v4si)__b);
+ return (__m128i)((__v4su)__a + (__v4su)__b);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_add_epi64(__m128i __a, __m128i __b)
{
- return (__m128i)((__v2di)__a + (__v2di)__b);
+ return (__m128i)((__v2du)__a + (__v2du)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mullo_epi16(__m128i __a, __m128i __b)
{
- return (__m128i)((__v8hi)__a * (__v8hi)__b);
+ return (__m128i)((__v8hu)__a * (__v8hu)__b);
}
/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi8(__m128i __a, __m128i __b)
{
- return (__m128i)((__v16qi)__a - (__v16qi)__b);
+ return (__m128i)((__v16qu)__a - (__v16qu)__b);
}
/// \brief Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi16(__m128i __a, __m128i __b)
{
- return (__m128i)((__v8hi)__a - (__v8hi)__b);
+ return (__m128i)((__v8hu)__a - (__v8hu)__b);
}
/// \brief Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi32(__m128i __a, __m128i __b)
{
- return (__m128i)((__v4si)__a - (__v4si)__b);
+ return (__m128i)((__v4su)__a - (__v4su)__b);
}
/// \brief Subtracts signed or unsigned 64-bit integer values and writes the
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sub_epi64(__m128i __a, __m128i __b)
{
- return (__m128i)((__v2di)__a - (__v2di)__b);
+ return (__m128i)((__v2du)__a - (__v2du)__b);
}
/// \brief Subtracts corresponding 8-bit signed integer values in the input and
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_and_si128(__m128i __a, __m128i __b)
{
- return (__m128i)((__v2di)__a & (__v2di)__b);
+ return (__m128i)((__v2du)__a & (__v2du)__b);
}
/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_andnot_si128(__m128i __a, __m128i __b)
{
- return (__m128i)(~(__v2di)__a & (__v2di)__b);
+ return (__m128i)(~(__v2du)__a & (__v2du)__b);
}
/// \brief Performs a bitwise OR of two 128-bit integer vectors.
///
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_or_si128(__m128i __a, __m128i __b)
{
- return (__m128i)((__v2di)__a | (__v2di)__b);
+ return (__m128i)((__v2du)__a | (__v2du)__b);
}
/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_xor_si128(__m128i __a, __m128i __b)
{
- return (__m128i)((__v2di)__a ^ (__v2di)__b);
+ return (__m128i)((__v2du)__a ^ (__v2du)__b);
}
/// \brief Left-shifts the 128-bit integer vector operand by the specified
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mullo_epi32 (__m128i __V1, __m128i __V2)
{
- return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
+ return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
typedef float __v4sf __attribute__((__vector_size__(16)));
typedef float __m128 __attribute__((__vector_size__(16)));
+/* Unsigned types */
+typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+
/* This header should only be included in a hosted environment as it depends on
* a standard library to provide allocation routines. */
#if __STDC_HOSTED__
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_and_ps(__m128 __a, __m128 __b)
{
- return (__m128)((__v4si)__a & (__v4si)__b);
+ return (__m128)((__v4su)__a & (__v4su)__b);
}
/// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float], using
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_andnot_ps(__m128 __a, __m128 __b)
{
- return (__m128)(~(__v4si)__a & (__v4si)__b);
+ return (__m128)(~(__v4su)__a & (__v4su)__b);
}
/// \brief Performs a bitwise OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_or_ps(__m128 __a, __m128 __b)
{
- return (__m128)((__v4si)__a | (__v4si)__b);
+ return (__m128)((__v4su)__a | (__v4su)__b);
}
/// \brief Performs a bitwise exclusive OR of two 128-bit vectors of
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_xor_ps(__m128 __a, __m128 __b)
{
- return (__m128)((__v4si)__a ^ (__v4si)__b);
+ return (__m128)((__v4su)__a ^ (__v4su)__b);
}
/// \brief Compares two 32-bit float values in the low-order bits of both