From: Manman Ren Date: Wed, 23 Oct 2013 20:33:14 +0000 (+0000) Subject: Intrinsics: fix extract & insert when index is out of bound. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8484375b0fc442c704b3ec06e2e795d88591606f;p=clang Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@193267 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 50454f2653..141c4d994b 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -435,21 +435,21 @@ static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi32(__m256i __a, int const __imm) { __v8si __b = (__v8si)__a; - return __b[__imm]; + return __b[__imm & 7]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi16(__m256i __a, int const __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm]; + return __b[__imm & 15]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi8(__m256i __a, int const __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm]; + return __b[__imm & 31]; } #ifdef __x86_64__ @@ -457,7 +457,7 @@ static __inline long long __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi64(__m256i __a, const int __imm) { __v4di __b = (__v4di)__a; - return __b[__imm]; + return __b[__imm & 3]; } #endif diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h index 5b6db6c445..53b3ccb431 100644 --- a/lib/Headers/smmintrin.h +++ b/lib/Headers/smmintrin.h @@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) #define _mm_extract_ps(X, N) (__extension__ \ ({ union { int __i; float __f; } __t; \ __v4sf __a = (__v4sf)(X); \ - __t.__f = __a[N]; \ + __t.__f = __a[(N) & 3]; \ __t.__i;})) /* Miscellaneous insert and extract macros. */ @@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /* Insert int into packed integer array at index. */ #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - __a[(N)] = (I); \ + __a[(N) & 15] = (I); \ __a;})) #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N)] = (I); \ + __a[(N) & 3] = (I); \ __a;})) #ifdef __x86_64__ #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)] = (I); \ + __a[(N) & 1] = (I); \ __a;})) #endif /* __x86_64__ */ @@ -231,12 +231,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) */ #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ (int)(unsigned char) \ - __a[(N)];})) + __a[(N) & 15];})) #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N)];})) + __a[(N) & 3];})) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)];})) + __a[(N) & 1];})) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c index 0e5a741bcf..c88946fe85 100644 --- a/test/CodeGen/avx-builtins.c +++ b/test/CodeGen/avx-builtins.c @@ -93,3 +93,21 @@ int test_mm_cmpistrz(__m128i A, __m128i B) { // CHECK: @llvm.x86.sse42.pcmpistriz128 return _mm_cmpistrz(A, B, 7); } + +int test_extract_epi32(__m256i __a) { + // CHECK-LABEL: @test_extract_epi32 + // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 + return _mm256_extract_epi32(__a, 8); +} + +int test_extract_epi16(__m256i __a) { + // CHECK-LABEL: @test_extract_epi16 + // CHECK: extractelement <16 x i16> %{{.*}}, i32 0 + return _mm256_extract_epi16(__a, 16); +} + +int test_extract_epi8(__m256i __a) { + // CHECK-LABEL: @test_extract_epi8 + // CHECK: extractelement <32 x i8> %{{.*}}, i32 0 + return _mm256_extract_epi8(__a, 32); +} diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c index cee9b3c2a5..1f5cb8e9e1 100644 --- a/test/CodeGen/sse-builtins.c +++ b/test/CodeGen/sse-builtins.c @@ -213,3 +213,27 @@ void test_extract_epi16(__m128i __a) { // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]] _mm_extract_epi16(__a, 8); } + +int test_extract_ps(__m128i __a) { + // CHECK-LABEL: @test_extract_ps + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + return _mm_extract_ps(__a, 4); +} + +int test_extract_epi8(__m128i __a) { + // CHECK-LABEL: @test_extract_epi8 + // CHECK: extractelement <16 x i8> %{{.*}}, i32 0 + return _mm_extract_epi8(__a, 16); +} + +int test_extract_epi32(__m128i __a) { + // CHECK-LABEL: @test_extract_epi32 + // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 + return _mm_extract_epi32(__a, 4); +} + +void test_insert_epi32(__m128i __a, int b) { + // CHECK-LABEL: @test_insert_epi32 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0 + _mm_insert_epi32(__a, b, 4); +}