From: Craig Topper Date: Sat, 4 Jun 2016 05:43:41 +0000 (+0000) Subject: [X86] Use unsigned types for vector arithmetic in intrinsics to avoid undefined behav... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=566f08cc02bd05a1f743538fc0f8a47559938b1d;p=clang [X86] Use unsigned types for vector arithmetic in intrinsics to avoid undefined behavior for signed integer overflow. This is really only needed for addition, subtraction, and multiplication, but I did the bitwise ops too for overall consistency. Clang currently doesn't set NSW for signed vector operations so the undefined behavior shouldn't happen today. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271778 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 029199e4cd..9df915a10b 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -81,25 +81,25 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi8(__m256i __a, __m256i __b) { - return (__m256i)((__v32qi)__a + (__v32qi)__b); + return (__m256i)((__v32qu)__a + (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi16(__m256i __a, __m256i __b) { - return (__m256i)((__v16hi)__a + (__v16hi)__b); + return (__m256i)((__v16hu)__a + (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi32(__m256i __a, __m256i __b) { - return (__m256i)((__v8si)__a + (__v8si)__b); + return (__m256i)((__v8su)__a + (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi64(__m256i __a, __m256i __b) { - return (__m256i)((__v4di)__a + (__v4di)__b); + return (__m256i)((__v4du)__a + (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -133,13 +133,13 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b) { - return (__m256i)((__v4di)__a & (__v4di)__b); + return (__m256i)((__v4du)__a & (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b) { - return (__m256i)(~(__v4di)__a & (__v4di)__b); + return (__m256i)(~(__v4du)__a & (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -462,13 +462,13 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi16(__m256i __a, __m256i __b) { - return (__m256i)((__v16hi)__a * (__v16hi)__b); + return (__m256i)((__v16hu)__a * (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi32 (__m256i __a, __m256i __b) { - return (__m256i)((__v8si)__a * (__v8si)__b); + return (__m256i)((__v8su)__a * (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -480,7 +480,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b) { - return (__m256i)((__v4di)__a | (__v4di)__b); + return (__m256i)((__v4du)__a | (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -658,25 +658,25 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi8(__m256i __a, __m256i __b) { - return (__m256i)((__v32qi)__a - (__v32qi)__b); + return (__m256i)((__v32qu)__a - (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi16(__m256i __a, __m256i __b) { - return (__m256i)((__v16hi)__a - (__v16hi)__b); + return (__m256i)((__v16hu)__a - (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi32(__m256i __a, __m256i __b) { - return (__m256i)((__v8si)__a - (__v8si)__b); + return (__m256i)((__v8su)__a - (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi64(__m256i __a, __m256i __b) { - return (__m256i)((__v4di)__a - (__v4di)__b); + return (__m256i)((__v4du)__a - (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -754,7 +754,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b) { - return (__m256i)((__v4di)__a ^ (__v4di)__b); + return (__m256i)((__v4du)__a ^ (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 01bea5bc83..c7c8f8c360 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -30,8 +30,6 @@ typedef unsigned int __mmask32; typedef unsigned long long __mmask64; -typedef char __v64qi __attribute__ ((__vector_size__ (64))); -typedef short __v32hi __attribute__ ((__vector_size__ (64))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) @@ -348,7 +346,7 @@ _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi8 (__m512i __A, __m512i __B) { - return (__m512i) ((__v64qi) __A + (__v64qi) __B); + return (__m512i) ((__v64qu) __A + (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -369,7 +367,7 @@ _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi8 (__m512i __A, __m512i __B) { - return (__m512i) ((__v64qi) __A - (__v64qi) __B); + return (__m512i) ((__v64qu) __A - (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -390,7 +388,7 @@ _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi16 (__m512i __A, __m512i __B) { - return (__m512i) ((__v32hi) __A + (__v32hi) __B); + return (__m512i) ((__v32hu) __A + (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -411,7 +409,7 @@ _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi16 (__m512i __A, __m512i __B) { - return (__m512i) ((__v32hi) __A - (__v32hi) __B); + return (__m512i) ((__v32hu) __A - (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -432,7 +430,7 @@ _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi16 (__m512i __A, __m512i __B) { - return (__m512i) ((__v32hi) __A * (__v32hi) __B); + return (__m512i) ((__v32hu) __A * (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index 72ce415357..e3ceffabb8 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -33,7 +33,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64 (__m512i __A, __m512i __B) { - return (__m512i) ((__v8di) __A * (__v8di) __B); + return (__m512i) ((__v8du) __A * (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -55,7 +55,7 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_xor_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8di) __A ^ (__v8di) __B); + return (__m512d) ((__v8du) __A ^ (__v8du) __B); } static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -77,7 +77,7 @@ _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16si) __A ^ (__v16si) __B); + return (__m512) ((__v16su) __A ^ (__v16su) __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -99,7 +99,7 @@ _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_or_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8di) __A | (__v8di) __B); + return (__m512d) ((__v8du) __A | (__v8du) __B); } static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -121,7 +121,7 @@ _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_or_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16si) __A | (__v16si) __B); + return (__m512) ((__v16su) __A | (__v16su) __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -143,7 +143,7 @@ _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_and_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8di) __A & (__v8di) __B); + return (__m512d) ((__v8du) __A & (__v8du) __B); } static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -165,7 +165,7 @@ _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_and_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16si) __A & (__v16si) __B); + return (__m512) ((__v16su) __A & (__v16su) __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 0aed8243b0..701d5cf17b 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -34,6 +34,12 @@ typedef float __v16sf __attribute__((__vector_size__(64))); typedef long long __v8di __attribute__((__vector_size__(64))); typedef int __v16si __attribute__((__vector_size__(64))); +/* Unsigned types */ +typedef unsigned char __v64qu __attribute__((__vector_size__(64))); +typedef unsigned short __v32hu __attribute__((__vector_size__(64))); +typedef unsigned long long __v8du __attribute__((__vector_size__(64))); +typedef unsigned int __v16su __attribute__((__vector_size__(64))); + typedef float __m512 __attribute__((__vector_size__(64))); typedef double __m512d __attribute__((__vector_size__(64))); typedef long long __m512i __attribute__((__vector_size__(64))); @@ -455,7 +461,7 @@ _mm512_castsi512_si256 (__m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) { - return (__m512i)((__v16si)__a & (__v16si)__b); + return (__m512i)((__v16su)__a & (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -479,7 +485,7 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi64(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a & (__v8di)__b); + return (__m512i)((__v8du)__a & (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -503,7 +509,7 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_si512 (__m512i __A, __m512i __B) { - return (__m512i)(~(__A) & __B); + return (__m512i)(~(__v8du)(__A) & (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -565,7 +571,7 @@ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi32(__m512i __a, __m512i __b) { - return (__m512i)((__v16si)__a | (__v16si)__b); + return (__m512i)((__v16su)__a | (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -589,7 +595,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi64(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a | (__v8di)__b); + return (__m512i)((__v8du)__a | (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -613,7 +619,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi32(__m512i __a, __m512i __b) { - return (__m512i)((__v16si)__a ^ (__v16si)__b); + return (__m512i)((__v16su)__a ^ (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -637,7 +643,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi64(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a ^ (__v8di)__b); + return (__m512i)((__v8du)__a ^ (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -661,19 +667,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_si512(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a & (__v8di)__b); + return (__m512i)((__v8du)__a & (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_si512(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a | (__v8di)__b); + return (__m512i)((__v8du)__a | (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_si512(__m512i __a, __m512i __b) { - return (__m512i)((__v8di)__a ^ (__v8di)__b); + return (__m512i)((__v8du)__a ^ (__v8du)__b); } /* Arithmetic */ @@ -716,7 +722,7 @@ _mm512_sub_ps(__m512 __a, __m512 __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi64 (__m512i __A, __m512i __B) { - return (__m512i) ((__v8di) __A + (__v8di) __B); + return (__m512i) ((__v8du) __A + (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -741,7 +747,7 @@ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi64 (__m512i __A, __m512i __B) { - return (__m512i) ((__v8di) __A - (__v8di) __B); + return (__m512i) ((__v8du) __A - (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -766,7 +772,7 @@ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi32 (__m512i __A, __m512i __B) { - return (__m512i) ((__v16si) __A + (__v16si) __B); + return (__m512i) ((__v16su) __A + (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -791,7 +797,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi32 (__m512i __A, __m512i __B) { - return (__m512i) ((__v16si) __A - (__v16si) __B); + return (__m512i) ((__v16su) __A - (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1462,7 +1468,7 @@ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi32 (__m512i __A, __m512i __B) { - return (__m512i) ((__v16si) __A * (__v16si) __B); + return (__m512i) ((__v16su) __A * (__v16su) __B); } static __inline __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index e4929ca430..8187bcd6b2 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -33,7 +33,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi64 (__m256i __A, __m256i __B) { - return (__m256i) ((__v4di) __A * (__v4di) __B); + return (__m256i) ((__v4du) __A * (__v4du) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -55,7 +55,7 @@ _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi64 (__m128i __A, __m128i __B) { - return (__m128i) ((__v2di) __A * (__v2di) __B); + return (__m128i) ((__v2du) __A * (__v2du) __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index e46f8f4ac4..540a35b0e6 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -35,6 +35,12 @@ typedef int __v8si __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); +/* Unsigned types */ +typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); +typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); +typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); +typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); + /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v32qs __attribute__((__vector_size__(32))); @@ -522,7 +528,7 @@ _mm256_rcp_ps(__m256 __a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b) { - return (__m256d)((__v4di)__a & (__v4di)__b); + return (__m256d)((__v4du)__a & (__v4du)__b); } /// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float]. @@ -540,7 +546,7 @@ _mm256_and_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b) { - return (__m256)((__v8si)__a & (__v8si)__b); + return (__m256)((__v8su)__a & (__v8su)__b); } /// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double], using @@ -561,7 +567,7 @@ _mm256_and_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b) { - return (__m256d)(~(__v4di)__a & (__v4di)__b); + return (__m256d)(~(__v4du)__a & (__v4du)__b); } /// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float], using @@ -582,7 +588,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b) { - return (__m256)(~(__v8si)__a & (__v8si)__b); + return (__m256)(~(__v8su)__a & (__v8su)__b); } /// \brief Performs a bitwise OR of two 256-bit vectors of [4 x double]. @@ -600,7 +606,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b) { - return (__m256d)((__v4di)__a | (__v4di)__b); + return (__m256d)((__v4du)__a | (__v4du)__b); } /// \brief Performs a bitwise OR of two 256-bit vectors of [8 x float]. @@ -618,7 +624,7 @@ _mm256_or_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b) { - return (__m256)((__v8si)__a | (__v8si)__b); + return (__m256)((__v8su)__a | (__v8su)__b); } /// \brief Performs a bitwise XOR of two 256-bit vectors of [4 x double]. @@ -636,7 +642,7 @@ _mm256_or_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b) { - return (__m256d)((__v4di)__a ^ (__v4di)__b); + return (__m256d)((__v4du)__a ^ (__v4du)__b); } /// \brief Performs a bitwise XOR of two 256-bit vectors of [8 x float]. @@ -654,7 +660,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b) { - return (__m256)((__v8si)__a ^ (__v8si)__b); + return (__m256)((__v8su)__a ^ (__v8su)__b); } /* Horizontal arithmetic */ diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 04cca74a42..c9fdda8191 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -37,7 +37,6 @@ typedef char __v16qi __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); -typedef unsigned int __v4su __attribute__((__vector_size__(16))); typedef unsigned short __v8hu __attribute__((__vector_size__(16))); typedef unsigned char __v16qu __attribute__((__vector_size__(16))); @@ -142,25 +141,25 @@ _mm_max_pd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b) { - return (__m128d)((__v4si)__a & (__v4si)__b); + return (__m128d)((__v4su)__a & (__v4su)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b) { - return (__m128d)(~(__v4si)__a & (__v4si)__b); + return (__m128d)(~(__v4su)__a & (__v4su)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b) { - return (__m128d)((__v4si)__a | (__v4si)__b); + return (__m128d)((__v4su)__a | (__v4su)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b) { - return (__m128d)((__v4si)__a ^ (__v4si)__b); + return (__m128d)((__v4su)__a ^ (__v4su)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -643,19 +642,19 @@ _mm_storel_pd(double *__dp, __m128d __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b) { - return (__m128i)((__v16qi)__a + (__v16qi)__b); + return (__m128i)((__v16qu)__a + (__v16qu)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b) { - return (__m128i)((__v8hi)__a + (__v8hi)__b); + return (__m128i)((__v8hu)__a + (__v8hu)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b) { - return (__m128i)((__v4si)__a + (__v4si)__b); + return (__m128i)((__v4su)__a + (__v4su)__b); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -667,7 +666,7 @@ _mm_add_si64(__m64 __a, __m64 __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b) { - return (__m128i)((__v2di)__a + (__v2di)__b); + return (__m128i)((__v2du)__a + (__v2du)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -764,7 +763,7 @@ _mm_mulhi_epu16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b) { - return (__m128i)((__v8hi)__a * (__v8hi)__b); + return (__m128i)((__v8hu)__a * (__v8hu)__b); } /// \brief Multiplies 32-bit unsigned integer values contained in the lower bits @@ -842,7 +841,7 @@ _mm_sad_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b) { - return (__m128i)((__v16qi)__a - (__v16qi)__b); + return (__m128i)((__v16qu)__a - (__v16qu)__b); } /// \brief Subtracts the corresponding 16-bit integer values in the operands. @@ -860,7 +859,7 @@ _mm_sub_epi8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b) { - return (__m128i)((__v8hi)__a - (__v8hi)__b); + return (__m128i)((__v8hu)__a - (__v8hu)__b); } /// \brief Subtracts the corresponding 32-bit integer values in the operands. @@ -878,7 +877,7 @@ _mm_sub_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b) { - return (__m128i)((__v4si)__a - (__v4si)__b); + return (__m128i)((__v4su)__a - (__v4su)__b); } /// \brief Subtracts signed or unsigned 64-bit integer values and writes the @@ -915,7 +914,7 @@ _mm_sub_si64(__m64 __a, __m64 __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b) { - return (__m128i)((__v2di)__a - (__v2di)__b); + return (__m128i)((__v2du)__a - (__v2du)__b); } /// \brief Subtracts corresponding 8-bit signed integer values in the input and @@ -1015,7 +1014,7 @@ _mm_subs_epu16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b) { - return (__m128i)((__v2di)__a & (__v2di)__b); + return (__m128i)((__v2du)__a & (__v2du)__b); } /// \brief Performs a bitwise AND of two 128-bit integer vectors, using the @@ -1035,7 +1034,7 @@ _mm_and_si128(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b) { - return (__m128i)(~(__v2di)__a & (__v2di)__b); + return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// \brief Performs a bitwise OR of two 128-bit integer vectors. /// @@ -1052,7 +1051,7 @@ _mm_andnot_si128(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b) { - return (__m128i)((__v2di)__a | (__v2di)__b); + return (__m128i)((__v2du)__a | (__v2du)__b); } /// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors. @@ -1070,7 +1069,7 @@ _mm_or_si128(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b) { - return (__m128i)((__v2di)__a ^ (__v2di)__b); + return (__m128i)((__v2du)__a ^ (__v2du)__b); } /// \brief Left-shifts the 128-bit integer vector operand by the specified diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h index 37abb7001d..e48ab034f4 100644 --- a/lib/Headers/smmintrin.h +++ b/lib/Headers/smmintrin.h @@ -121,7 +121,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) ((__v4si)__V1 * (__v4si)__V2); + return (__m128i) ((__v4su)__V1 * (__v4su)__V2); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 2a048c880f..937713cad5 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -30,6 +30,9 @@ typedef int __v4si __attribute__((__vector_size__(16))); typedef float __v4sf __attribute__((__vector_size__(16))); typedef float __m128 __attribute__((__vector_size__(16))); +/* Unsigned types */ +typedef unsigned int __v4su __attribute__((__vector_size__(16))); + /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ #if __STDC_HOSTED__ @@ -414,7 +417,7 @@ _mm_max_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b) { - return (__m128)((__v4si)__a & (__v4si)__b); + return (__m128)((__v4su)__a & (__v4su)__b); } /// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float], using @@ -436,7 +439,7 @@ _mm_and_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b) { - return (__m128)(~(__v4si)__a & (__v4si)__b); + return (__m128)(~(__v4su)__a & (__v4su)__b); } /// \brief Performs a bitwise OR of two 128-bit vectors of [4 x float]. @@ -454,7 +457,7 @@ _mm_andnot_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b) { - return (__m128)((__v4si)__a | (__v4si)__b); + return (__m128)((__v4su)__a | (__v4su)__b); } /// \brief Performs a bitwise exclusive OR of two 128-bit vectors of @@ -473,7 +476,7 @@ _mm_or_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b) { - return (__m128)((__v4si)__a ^ (__v4si)__b); + return (__m128)((__v4su)__a ^ (__v4su)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both