From: Fiona Glaser Date: Tue, 10 Mar 2009 06:37:53 +0000 (-0700) Subject: Faster integral_init X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=acd4b2641c662bbe29795e986c58a3c47de675e9;p=libx264 Faster integral_init palignr to avoid unaligned loads is worth it in inith, but not initv. --- diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm index 89e1212a..cebe59fd 100644 --- a/common/x86/mc-a2.asm +++ b/common/x86/mc-a2.asm @@ -759,7 +759,8 @@ cglobal x264_integral_init4h_sse4, 3,4 pxor m4, m4 .loop: movdqa m0, [r1+r2] - movdqu m1, [r1+r2+8] + movdqa m1, [r1+r2+16] + palignr m1, m0, 8 mpsadbw m0, m4, 0 mpsadbw m1, m4, 0 paddw m0, [r0+r2*2] @@ -777,7 +778,8 @@ cglobal x264_integral_init8h_sse4, 3,4 pxor m4, m4 .loop: movdqa m0, [r1+r2] - movdqu m1, [r1+r2+8] + movdqa m1, [r1+r2+16] + palignr m1, m0, 8 movdqa m2, m0 movdqa m3, m1 mpsadbw m0, m4, 0