From 86ca49033ad85660dc88f82ab721263f4d29290e Mon Sep 17 00:00:00 2001 From: Laurent Aimar Date: Fri, 23 Jul 2004 18:14:59 +0000 Subject: [PATCH] * encoder.c, analyse.c, macroblock: fixed when using a qp per MB. (Buggy for pskip and mb with null cbp luma and chroma). * dct*: fixed order of idct. git-svn-id: svn://svn.videolan.org/x264/trunk@13 df754926-b1dd-0310-bc7b-ec298dee348c --- core/dct.c | 37 +++++++++++++++++++------------------ core/i386/dct.asm | 20 ++++++++++---------- core/macroblock.c | 3 +++ encoder/analyse.c | 17 +++++++++++------ encoder/encoder.c | 2 +- encoder/macroblock.c | 12 ++++++++++-- 6 files changed, 54 insertions(+), 37 deletions(-) diff --git a/core/dct.c b/core/dct.c index 65aab0cf..6e3a16fc 100644 --- a/core/dct.c +++ b/core/dct.c @@ -197,30 +197,31 @@ static void add4x4_idct( uint8_t *p_dst, int i_dst, int16_t dct[4][4] ) for( i = 0; i < 4; i++ ) { - const int s02 = dct[0][i] + dct[2][i]; - const int d02 = dct[0][i] - dct[2][i]; - const int s13 = dct[1][i] + (dct[3][i]>>1); - const int d13 = (dct[1][i]>>1) - dct[3][i]; - - tmp[0][i] = s02 + s13; - tmp[1][i] = d02 + d13; - tmp[2][i] = d02 - d13; - tmp[3][i] = s02 - s13; + const int s02 = dct[i][0] + dct[i][2]; + const int d02 = dct[i][0] - dct[i][2]; + const int s13 = dct[i][1] + (dct[i][3]>>1); + const int d13 = (dct[i][1]>>1) - dct[i][3]; + + tmp[i][0] = s02 + s13; + tmp[i][1] = d02 + d13; + tmp[i][2] = d02 - d13; + tmp[i][3] = s02 - s13; } for( i = 0; i < 4; i++ ) { - const int s02 = tmp[i][0] + tmp[i][2]; - const int d02 = tmp[i][0] - tmp[i][2]; - const int s13 = tmp[i][1] + (tmp[i][3]>>1); - const int d13 = (tmp[i][1]>>1) - tmp[i][3]; - - d[i][0] = ( s02 + s13 + 32 ) >> 6; - d[i][1] = ( d02 + d13 + 32 ) >> 6; - d[i][2] = ( d02 - d13 + 32 ) >> 6; - d[i][3] = ( s02 - s13 + 32 ) >> 6; + const int s02 = tmp[0][i] + tmp[2][i]; + const int d02 = tmp[0][i] - tmp[2][i]; + const int s13 = tmp[1][i] + (tmp[3][i]>>1); + const int d13 = (tmp[1][i]>>1) - tmp[3][i]; + + d[0][i] = ( s02 + s13 + 32 ) >> 6; + d[1][i] = ( d02 + d13 + 32 ) >> 6; + d[2][i] = ( d02 - d13 + 32 ) >> 6; + d[3][i] = ( s02 - s13 + 32 ) >> 6; } + for( y = 0; y < 4; y++ ) { for( x = 0; x < 4; x++ ) diff --git a/core/i386/dct.asm b/core/i386/dct.asm index 054daba7..92dbc5ae 100644 --- a/core/i386/dct.asm +++ b/core/i386/dct.asm @@ -277,14 +277,17 @@ x264_add4x4_idct_mmxext: ; Load dct coeffs mov eax, [esp+12] ; dct movq mm0, [eax+ 0] - movq mm1, [eax+ 8] - movq mm2, [eax+16] - movq mm3, [eax+24] + movq mm4, [eax+ 8] + movq mm3, [eax+16] + movq mm1, [eax+24] mov eax, [esp+ 4] ; p_dst mov ecx, [esp+ 8] ; i_dst lea edx, [ecx+ecx*2] + ; out:mm0, mm1, mm2, mm3 + MMX_TRANSPOSE mm0, mm4, mm3, mm1, mm2 + MMX_SUMSUB_BA mm2, mm0 ; mm2=s02 mm0=d02 MMX_SUMSUBD2_AB mm1, mm3, mm5, mm4 ; mm1=s13 mm4=d13 ( well 1 + 3>>1 and 1>>1 + 3) @@ -298,16 +301,13 @@ x264_add4x4_idct_mmxext: MMX_SUMSUB_BADC mm2, mm3, mm4, mm1 ; mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 - ; in: mm2, mm4, mm1, mm3 out: mm2, mm3, mm0, mm1 - MMX_TRANSPOSE mm2, mm4, mm1, mm3, mm0 - MMX_ZERO mm7 movq mm6, [x264_mmx_32] - MMX_STORE_DIFF_4P mm2, mm4, mm6, mm7, [eax] - MMX_STORE_DIFF_4P mm3, mm4, mm6, mm7, [eax+ecx] - MMX_STORE_DIFF_4P mm0, mm4, mm6, mm7, [eax+ecx*2] - MMX_STORE_DIFF_4P mm1, mm4, mm6, mm7, [eax+edx] + MMX_STORE_DIFF_4P mm2, mm0, mm6, mm7, [eax] + MMX_STORE_DIFF_4P mm4, mm0, mm6, mm7, [eax+ecx] + MMX_STORE_DIFF_4P mm1, mm0, mm6, mm7, [eax+ecx*2] + MMX_STORE_DIFF_4P mm3, mm0, mm6, mm7, [eax+edx] ret diff --git a/core/macroblock.c b/core/macroblock.c index 59603f03..d03413c6 100644 --- a/core/macroblock.c +++ b/core/macroblock.c @@ -892,6 +892,9 @@ void x264_macroblock_cache_save( x264_t *h ) int i; + if( IS_SKIP( h->mb.i_type ) ) + h->mb.qp[i_mb_xy] = h->mb.i_last_qp; + h->mb.i_last_dqp = h->mb.qp[i_mb_xy] - h->mb.i_last_qp; h->mb.i_last_qp = h->mb.qp[i_mb_xy]; diff --git a/encoder/analyse.c b/encoder/analyse.c index 77e2c316..e4a7a025 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -847,9 +847,15 @@ void x264_macroblock_analyse( x264_t *h ) x264_mb_analysis_t analysis; int i; - /* qp TODO */ + /* qp TODO implement a nice RC */ h->mb.qp[h->mb.i_mb_xy] = x264_clip3( h->pps->i_pic_init_qp + h->sh.i_qp_delta + 0, 0, 51 ); + /* FIXME check if it's 12 */ + if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp < -12 ) + h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp - 12; + else if( h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp > 12 ) + h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp + 12; + /* init analysis */ x264_mb_analyse_init( h, &analysis, h->mb.qp[h->mb.i_mb_xy] ); @@ -871,11 +877,10 @@ void x264_macroblock_analyse( x264_t *h ) int i_cost; /* Fast P_SKIP detection */ - if( analysis.i_qp == h->mb.i_last_qp && - ( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) || - ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) || - ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) || - ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) ) ) + if( ( (i_neighbour&MB_LEFT) && h->mb.type[h->mb.i_mb_xy - 1] == P_SKIP ) || + ( (i_neighbour&MB_TOP) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] == P_SKIP ) || + ( ((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT) ) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] == P_SKIP ) || + ( (i_neighbour&MB_TOPRIGHT) && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] == P_SKIP ) ) { b_skip = x264_macroblock_probe_pskip( h ); } diff --git a/encoder/encoder.c b/encoder/encoder.c index 73ca380b..3188bb73 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -37,7 +37,7 @@ #include "macroblock.h" //#define DEBUG_MB_TYPE -//#define DEBUG_DUMP_FRAME 1 +#define DEBUG_DUMP_FRAME 1 static int64_t i_mtime_encode_frame = 0; diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 353f3d4b..17bea963 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -724,12 +724,19 @@ void x264_macroblock_encode( x264_t *h ) /* store cbp */ h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma; + if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 ) + { + /* It won'y change anything at the decoder side but it is needed else the + * decoder will fail to read the next QP */ + h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; + } + + /* Check for P_SKIP * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account * (if multiple mv give same result)*/ if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 && - h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 && - h->mb.qp[h->mb.i_mb_xy] == h->mb.i_last_qp ) + h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 ) { if( h->mb.cache.ref[0][x264_scan8[0]] == 0 ) { @@ -740,6 +747,7 @@ void x264_macroblock_encode( x264_t *h ) h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] ) { h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = P_SKIP; + h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */ } } } -- 2.40.0