From: Loren Merritt Date: Tue, 25 Apr 2006 04:08:21 +0000 (+0000) Subject: Use sa8d instead of satd for i8x8 search. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c1f64a50b7563b737c8938ed796f46d3bad354a4;p=libx264 Use sa8d instead of satd for i8x8 search. +.01 dB, -.5% speed git-svn-id: svn://svn.videolan.org/x264/trunk@512 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/i386/pixel-a.asm b/common/i386/pixel-a.asm index 834841e8..e7278fd6 100644 --- a/common/i386/pixel-a.asm +++ b/common/i386/pixel-a.asm @@ -847,14 +847,14 @@ x264_pixel_satd_16x16_mmxext: %endmacro %macro SUM4x8_MM 0 - movq [spill], mm7 - MMX_ABS mm0, mm7 - MMX_ABS mm1, mm7 - MMX_ABS mm2, mm7 - MMX_ABS mm3, mm7 + movq [spill], mm6 + movq [spill+8], mm7 + MMX_ABS_TWO mm0, mm1, mm6, mm7 + MMX_ABS_TWO mm2, mm3, mm6, mm7 paddw mm0, mm2 paddw mm1, mm3 - movq mm7, [spill] + movq mm6, [spill] + movq mm7, [spill+8] MMX_ABS_TWO mm4, mm5, mm2, mm3 MMX_ABS_TWO mm6, mm7, mm2, mm3 paddw mm4, mm6 @@ -870,14 +870,14 @@ ALIGN 16 ;----------------------------------------------------------------------------- x264_pixel_sa8d_8x8_mmxext: SATD_START - sub esp, 0x68 -%define args esp+0x6c + sub esp, 0x70 +%define args esp+0x74 %define spill esp+0x60 LOAD_DIFF_4x8P 0 HADAMARD1x8 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7 movq [spill], mm0 - TRANSPOSE4x4 mm4, mm5, mm6, mm7, mm0 ; abcd-t -> adtc + TRANSPOSE4x4 mm4, mm5, mm6, mm7, mm0 movq [esp+0x00], mm4 movq [esp+0x08], mm7 movq [esp+0x10], mm0 @@ -894,13 +894,13 @@ x264_pixel_sa8d_8x8_mmxext: LOAD_DIFF_4x8P 4 HADAMARD1x8 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7 - movq [spill], mm4 - TRANSPOSE4x4 mm0, mm1, mm2, mm3, mm4 + movq [spill], mm7 + TRANSPOSE4x4 mm0, mm1, mm2, mm3, mm7 movq [esp+0x40], mm0 movq [esp+0x48], mm3 - movq [esp+0x50], mm4 + movq [esp+0x50], mm7 movq [esp+0x58], mm2 - movq mm4, [spill] + movq mm7, [spill] TRANSPOSE4x4 mm4, mm5, mm6, mm7, mm0 movq mm5, [esp+0x00] movq mm1, [esp+0x08] @@ -933,7 +933,7 @@ x264_pixel_sa8d_8x8_mmxext: mov ecx, eax ; preserve rounding for 16x16 add eax, 1 shr eax, 1 - add esp, 0x68 + add esp, 0x70 pop ebx ret %undef args diff --git a/encoder/analyse.c b/encoder/analyse.c index 819c51ff..95f11161 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -496,6 +496,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ /* 8x8 prediction selection */ if( flags & X264_ANALYSE_I8x8 ) { + x264_pixel_cmp_t sa8d = (*h->pixf.mbcmp == *h->pixf.sad) ? h->pixf.sad[PIXEL_8x8] : h->pixf.sa8d[PIXEL_8x8]; int i_satd_thresh = a->b_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 ); int i_cost = 0; @@ -520,10 +521,8 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] ); - /* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */ - i_satd = h->pixf.mbcmp[PIXEL_8x8]( p_dst_by, FDEC_STRIDE, - p_src_by, FENC_STRIDE ) - + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); + i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE ) + + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode ); a->i_satd_i8x8_dir[i_mode][idx] = i_satd; @@ -588,7 +587,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE ) - + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); + + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode ); }