From 540d910350121895597dab8c9c219aa28c0f64e4 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Mon, 2 Jun 2014 18:48:33 -0700 Subject: [PATCH] Fix potential overflow issue in SSSE3 forward 8x8 2D-DCT The SSSE3 implementation might find a potential overflow issue in its second 1-D transform, if all input residual pixels are close to 255. This commit fixes the issue and re-enables the unit test on the SSSE3 version. Change-Id: I0520478abdab7afd3ff2842516bec951111e9b3c --- vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm index 8723a7114..28458dcdd 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm @@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 %endmacro +TRANSFORM_COEFFS 11585, 11585 TRANSFORM_COEFFS 15137, 6270 TRANSFORM_COEFFS 16069, 3196 TRANSFORM_COEFFS 9102, 13623 @@ -83,7 +84,7 @@ SECTION .text %endmacro ; 1D forward 8x8 DCT transform -%macro FDCT8_1D 0 +%macro FDCT8_1D 1 SUM_SUB 0, 7, 9 SUM_SUB 1, 6, 9 SUM_SUB 2, 5, 9 @@ -92,14 +93,21 @@ SECTION .text SUM_SUB 0, 3, 9 SUM_SUB 1, 2, 9 SUM_SUB 6, 5, 9 +%if %1 == 0 SUM_SUB 0, 1, 9 +%endif BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 pmulhrsw m6, m12 pmulhrsw m5, m12 +%if %1 == 0 pmulhrsw m0, m12 pmulhrsw m1, m12 +%else + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 + SWAP 0, 1 +%endif SUM_SUB 4, 5, 9 SUM_SUB 7, 6, 9 @@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride psllw m7, 2 ; column transform - FDCT8_1D + FDCT8_1D 0 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - FDCT8_1D + FDCT8_1D 1 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 DIVIDE_ROUND_2X 0, 1, 9, 10 -- 2.40.0