From: Jingning Han Date: Wed, 1 Apr 2015 16:19:13 +0000 (-0700) Subject: Reduce required xmm number by one in block_error_fp X-Git-Tag: v1.5.0~803^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f2cf3c06a0835bf436bb25c5a2f28265c287ec70;p=libvpx Reduce required xmm number by one in block_error_fp Use 6 xmms instead of 8. Change-Id: If976ad85d09191d2fb0565399d690f2869dbbcc7 --- diff --git a/vp9/encoder/x86/vp9_error_sse2.asm b/vp9/encoder/x86/vp9_error_sse2.asm index 318379777..56373e897 100644 --- a/vp9/encoder/x86/vp9_error_sse2.asm +++ b/vp9/encoder/x86/vp9_error_sse2.asm @@ -78,7 +78,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz ; intptr_t block_size) INIT_XMM sse2 -cglobal block_error_fp, 3, 3, 8, uqc, dqc, size +cglobal block_error_fp, 3, 3, 6, uqc, dqc, size pxor m4, m4 ; sse accumulator pxor m5, m5 ; dedicated zero register lea uqcq, [uqcq+sizeq*2] @@ -96,13 +96,13 @@ cglobal block_error_fp, 3, 3, 8, uqc, dqc, size pmaddwd m0, m0 pmaddwd m1, m1 ; accumulate in 64bit - punpckldq m7, m0, m5 + punpckldq m3, m0, m5 punpckhdq m0, m5 - paddq m4, m7 - punpckldq m7, m1, m5 + paddq m4, m3 + punpckldq m3, m1, m5 paddq m4, m0 punpckhdq m1, m5 - paddq m4, m7 + paddq m4, m3 paddq m4, m1 add sizeq, mmsize jl .loop