From: Steinar Midtskogen Date: Tue, 4 Oct 2016 07:55:53 +0000 (+0200) Subject: On x86 use _mm_set_epi32 when _mm_cvtsi64_si128 isn't available X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b074823863d263bdb61c39f24ccad53d15e27d5b;p=libvpx On x86 use _mm_set_epi32 when _mm_cvtsi64_si128 isn't available Change-Id: Ibdbd720d4f68892da6164a9849e212e759305005 --- diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h index 450499602..9a10f2055 100644 --- a/aom_dsp/simd/v128_intrinsics_x86.h +++ b/aom_dsp/simd/v128_intrinsics_x86.h @@ -162,7 +162,11 @@ SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) { SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) { #if defined(__SSSE3__) +#ifdef __x86_64__ v128 order = _mm_cvtsi64_si128(0x0e0c0a0806040200LL); +#else + v128 order = _mm_set_epi32(0, 0, 0x0e0c0a08, 0x06040200); +#endif return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order), _mm_shuffle_epi8(a, order)); #else @@ -176,7 +180,11 @@ SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) { SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) { #if defined(__SSSE3__) +#ifdef __x86_64__ v128 order = _mm_cvtsi64_si128(0x0d0c090805040100LL); +#else + v128 order = _mm_set_epi32(0, 0, 0x0d0c0908, 0x05040100); +#endif return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order), _mm_shuffle_epi8(a, order)); #else diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h index b951492fd..09eb160f5 100644 --- a/aom_dsp/simd/v64_intrinsics_x86.h +++ b/aom_dsp/simd/v64_intrinsics_x86.h @@ -47,7 +47,11 @@ SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) { } SIMD_INLINE v64 v64_from_64(uint64_t x) { +#ifdef __x86_64__ + return _mm_cvtsi64_si128(x); +#else return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x); +#endif } SIMD_INLINE uint64_t v64_u64(v64 x) { @@ -168,7 +172,7 @@ SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) { SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { #if defined(__SSSE3__) return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a), - _mm_cvtsi64_si128(0x0f0d0b0907050301LL)); + v64_from_64(0x0f0d0b0907050301LL)); #else return _mm_packus_epi16( _mm_unpacklo_epi64(_mm_srli_epi16(b, 8), _mm_srli_epi16(a, 8)), @@ -179,7 +183,7 @@ SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { #if defined(__SSSE3__) return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a), - _mm_cvtsi64_si128(0x0e0c0a0806040200LL)); + v64_from_64(0x0e0c0a0806040200LL)); #else return v64_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1)); #endif @@ -188,7 +192,7 @@ SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { #if defined(__SSSE3__) return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a), - _mm_cvtsi64_si128(0x0f0e0b0a07060302LL)); + v64_from_64(0x0f0e0b0a07060302LL)); #else return _mm_packs_epi32( _mm_unpacklo_epi64(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16)), @@ -199,7 +203,7 @@ SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) { #if defined(__SSSE3__) return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a), - _mm_cvtsi64_si128(0x0d0c090805040100LL)); + v64_from_64(0x0d0c090805040100LL)); #else return v64_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2)); #endif