From: Yunqing Wang Date: Wed, 20 Nov 2013 20:52:56 +0000 (-0800) Subject: Correct ssse3 8/16-pixel wide sub-pixel filter calculation X-Git-Tag: v1.4.0~2983^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=256cf7ee7d535ee26487a7e0352655d76932afa2;p=libvpx Correct ssse3 8/16-pixel wide sub-pixel filter calculation Although no mismatch was indicated for 8/16 wide sub-pixel filters in issue 661, they had similar problems that could cause mismatch potentially. This patch fixed calculations in HORIZx8/16 and VERTx8/16. Change-Id: I169961c9d40a20340995b7d22aafc89ccf30bfca --- diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index 17881ed47..634fa7746 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -158,10 +158,13 @@ pmaddubsw xmm6, k6k7 paddsw xmm0, xmm6 - paddsw xmm0, xmm2 + movdqa xmm1, xmm2 + pmaxsw xmm2, xmm4 + pminsw xmm4, xmm1 paddsw xmm0, xmm4 - paddsw xmm0, krd + paddsw xmm0, xmm2 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -243,10 +246,13 @@ pmaddubsw xmm6, k6k7 paddsw xmm0, xmm6 - paddsw xmm0, xmm2 + movdqa xmm1, xmm2 + pmaxsw xmm2, xmm4 + pminsw xmm4, xmm1 paddsw xmm0, xmm4 - paddsw xmm0, krd + paddsw xmm0, xmm2 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 %if %1 @@ -635,9 +641,13 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): pmaddubsw %3, k4k5 pmaddubsw %4, k6k7 - paddsw %1, %2 paddsw %1, %4 + movdqa %4, %2 + pmaxsw %2, %3 + pminsw %3, %4 paddsw %1, %3 + paddsw %1, %2 + paddsw %1, krd psraw %1, 7 packuswb %1, %1 @@ -783,12 +793,19 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): pmaddubsw xmm6, k4k5 pmaddubsw xmm7, k6k7 - paddsw xmm0, xmm1 paddsw xmm0, xmm3 + movdqa xmm3, xmm1 + pmaxsw xmm1, xmm2 + pminsw xmm2, xmm3 paddsw xmm0, xmm2 - paddsw xmm4, xmm5 + paddsw xmm0, xmm1 + paddsw xmm4, xmm7 + movdqa xmm7, xmm5 + pmaxsw xmm5, xmm6 + pminsw xmm6, xmm7 paddsw xmm4, xmm6 + paddsw xmm4, xmm5 paddsw xmm0, krd paddsw xmm4, krd