From: Loren Merritt Date: Fri, 21 Mar 2008 01:35:54 +0000 (-0600) Subject: use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ead697cad4c2090255fccecbded84346fd398075;p=libx264 use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma --- diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm index 0296fbd6..b262e391 100644 --- a/common/x86/mc-a.asm +++ b/common/x86/mc-a.asm @@ -341,14 +341,15 @@ cglobal x264_mc_copy_w16_mmx, 5,7 jg .height_loop REP_RET -cglobal x264_mc_copy_w16_sse2, 5,7 +%macro COPY_W16_SSE2 2 +cglobal %1, 5,7 lea r6, [r3*3] lea r5, [r1*3] .height_loop: - movdqu xmm0, [r2] - movdqu xmm1, [r2+r3] - movdqu xmm2, [r2+r3*2] - movdqu xmm3, [r2+r6] + %2 xmm0, [r2] + %2 xmm1, [r2+r3] + %2 xmm2, [r2+r3*2] + %2 xmm3, [r2+r6] movdqa [r0], xmm0 movdqa [r0+r1], xmm1 movdqa [r0+r1*2], xmm2 @@ -358,6 +359,10 @@ cglobal x264_mc_copy_w16_sse2, 5,7 sub r4d, 4 jg .height_loop REP_RET +%endmacro + +COPY_W16_SSE2 x264_mc_copy_w16_sse2, movdqu +COPY_W16_SSE2 x264_mc_copy_w16_aligned_sse2, movdqa diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index 5d855dea..9c534515 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -49,6 +49,7 @@ extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int ); extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int ); extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int ); extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int ); +extern void x264_mc_copy_w16_aligned_sse2( uint8_t *, int, uint8_t *, int, int ); extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int ); extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int ); extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int ); @@ -215,6 +216,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) pf->mc_luma = mc_luma_sse2; pf->get_ref = get_ref_sse2; + pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2; }