From: Jingning Han Date: Wed, 13 Nov 2013 22:48:38 +0000 (-0800) Subject: Fix an overflow issue in SSE2 forward ADST X-Git-Tag: v1.3.0~25 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fabc7836956c8c48adf6c570957c7fddb7ec0253;p=libvpx Fix an overflow issue in SSE2 forward ADST The step that sums three input samples could potentially cause the intermediate result go beyond 16 bit limit, when operating as the second 1-D transform. This commit fixes the issue. Change-Id: Iaf512449ac2d25ddd8a806d760afab362c62a516 --- diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index dc115018e..fefca660d 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -206,12 +206,12 @@ void fadst4_1d_sse2(__m128i *in) { const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[8], v[8]; __m128i in7 = _mm_add_epi16(in[0], in[1]); - in7 = _mm_sub_epi16(in7, in[3]); u[0] = _mm_unpacklo_epi16(in[0], in[1]); u[1] = _mm_unpacklo_epi16(in[2], in[3]); u[2] = _mm_unpacklo_epi16(in7, kZero); u[3] = _mm_unpacklo_epi16(in[2], kZero); + u[4] = _mm_unpacklo_epi16(in[3], kZero); v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2 v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5 @@ -219,9 +219,10 @@ void fadst4_1d_sse2(__m128i *in) { v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3 v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6 v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4 + v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03); u[0] = _mm_add_epi32(v[0], v[1]); - u[1] = v[2]; + u[1] = _mm_sub_epi32(v[2], v[6]); u[2] = _mm_add_epi32(v[3], v[4]); u[3] = _mm_sub_epi32(u[2], u[0]); u[4] = _mm_slli_epi32(v[5], 2);