From a93705f7f9c15cdc2a1e62f6142e99f794923826 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Matthias=20R=C3=A4ncker?= Date: Thu, 20 Sep 2018 20:20:39 +0200 Subject: [PATCH] sanitizer: fix unaligned load/stores MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When built with -fsanitizer=address,undefined a number of tests, such as ByteAlignmentTest.SwitchByteAlignment or ByteAlignmentTest.SwitchByteAlignment produce runtime errors about unaligned 4-byte loads/stores. While normally not really a problem, this does technically violate the language and it is eays to fix in a standard conforming way using memcpy which does not produce inferior code. Signed-off-by: Matthias Räncker Change-Id: Ie1e97ab25fe874f864df48b473569f00563181ae --- vpx_dsp/x86/loopfilter_sse2.c | 17 +++++++++-------- vpx_dsp/x86/mem_sse2.h | 11 +++++++++++ vpx_dsp/x86/variance_sse2.c | 5 +++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index 28e6fd65f..1a76d670e 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -13,6 +13,7 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_ports/emmintrin_compat.h" +#include "vpx_dsp/x86/mem_sse2.h" static INLINE __m128i abs_diff(__m128i a, __m128i b) { return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); @@ -212,21 +213,21 @@ void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); - *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 0 * p - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 1 * p - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 2 * p - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 3 * p - 2, _mm_cvtsi128_si32(ps1ps0)); - *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 4 * p - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 5 * p - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 6 * p - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 7 * p - 2, _mm_cvtsi128_si32(qs1qs0)); } void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, diff --git a/vpx_dsp/x86/mem_sse2.h b/vpx_dsp/x86/mem_sse2.h index 943d7d7fe..48dc97970 100644 --- a/vpx_dsp/x86/mem_sse2.h +++ b/vpx_dsp/x86/mem_sse2.h @@ -12,9 +12,20 @@ #define VPX_VPX_DSP_X86_MEM_SSE2_H_ #include // SSE2 +#include #include "./vpx_config.h" +static INLINE void storeu_uint32(void *dst, uint32_t v) { + memcpy(dst, &v, sizeof(v)); +} + +static INLINE uint32_t loadu_uint32(const void *src) { + uint32_t v; + memcpy(&v, src, sizeof(v)); + return v; +} + static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) { return _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src)); diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c index a2a13a68b..027905255 100644 --- a/vpx_dsp/x86/variance_sse2.c +++ b/vpx_dsp/x86/variance_sse2.c @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" +#include "vpx_dsp/x86/mem_sse2.h" static INLINE unsigned int add32x4_sse2(__m128i val) { val = _mm_add_epi32(val, _mm_srli_si128(val, 8)); @@ -35,8 +36,8 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *src) { } static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) { - const __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 0 * stride)); - const __m128i p1 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 1 * stride)); + const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride)); + const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride)); const __m128i p01 = _mm_unpacklo_epi32(p0, p1); return _mm_unpacklo_epi8(p01, _mm_setzero_si128()); } -- 2.40.0