From: Craig Topper Date: Wed, 11 Nov 2015 08:00:41 +0000 (+0000) Subject: [X86] Use __builtin_ia32_paddq and __builtin_ia32_psubq to implement a couple intrins... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=271fd30fa4b44d153568a573b1b447307fd1bce4;p=clang [X86] Use __builtin_ia32_paddq and __builtin_ia32_psubq to implement a couple intrinsics that were supposed to operate on MMX registers. Otherwise we end up operating on GPRs. Throw in a test for _mm_mul_su32 while I was there. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@252711 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index cb216c07e9..114aa0f351 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -647,7 +647,7 @@ _mm_add_epi32(__m128i __a, __m128i __b) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { - return __a + __b; + return (__m64)__builtin_ia32_paddq(__a, __b); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -779,7 +779,7 @@ _mm_sub_epi32(__m128i __a, __m128i __b) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { - return __a - __b; + return (__m64)__builtin_ia32_psubq(__a, __b); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c index 11a094aad7..fce57b665f 100644 --- a/test/CodeGen/sse-builtins.c +++ b/test/CodeGen/sse-builtins.c @@ -495,3 +495,21 @@ __m128i test_mm_undefined_si128() { // CHECK: ret <2 x i64> undef return _mm_undefined_si128(); } + +__m64 test_mm_add_si64(__m64 __a, __m64 __b) { + // CHECK-LABEL: @test_mm_add_si64 + // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}}) + return _mm_add_si64(__a, __b); +} + +__m64 test_mm_sub_si64(__m64 __a, __m64 __b) { + // CHECK-LABEL: @test_mm_sub_si64 + // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}}) + return _mm_sub_si64(__a, __b); +} + +__m64 test_mm_mul_su32(__m64 __a, __m64 __b) { + // CHECK-LABEL: @test_mm_mul_su32 + // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}}) + return _mm_mul_su32(__a, __b); +}