From: Loren Merritt Date: Sat, 8 Aug 2009 14:53:27 +0000 (+0000) Subject: MB-tree fixes: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5599c4788e4ce72c04e536723075f7547deeaec3;p=libx264 MB-tree fixes: AQ was applied inconsistently, with some AQed costs compared to other non-AQed costs. Strangely enough, fixing this increases SSIM on some sources but decreases it on others. More investigation needed. Account for weighted bipred. Reduce memory, increase precision, simplify, and early terminate. --- diff --git a/common/frame.c b/common/frame.c index b642717d..90dd0705 100644 --- a/common/frame.c +++ b/common/frame.c @@ -95,7 +95,7 @@ x264_frame_t *x264_frame_new( x264_t *h ) } CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) ); memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) ); - CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) ); + CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint16_t) ); for( j = 0; j <= h->param.i_bframe+1; j++ ) for( i = 0; i <= h->param.i_bframe+1; i++ ) { diff --git a/common/frame.h b/common/frame.h index a3da4e40..f70d38ab 100644 --- a/common/frame.h +++ b/common/frame.h @@ -85,7 +85,7 @@ typedef struct float *f_qp_offset; int b_intra_calculated; uint16_t *i_intra_cost; - uint32_t *i_propagate_cost; + uint16_t *i_propagate_cost; uint16_t *i_inv_qscale_factor; /* threading */ diff --git a/encoder/slicetype.c b/encoder/slicetype.c index be32e056..7b7a4d07 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -403,41 +403,32 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b ) { x264_frame_t *refs[2] = {frames[p0],frames[p1]}; - int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); + int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; + int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] }; for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ ) { - for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ ) + int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride; + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++, mb_index++ ) { - int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride; int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index]; - int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8; - int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index]; - /* The approximate amount of data that this block contains. */ - int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index]; - - /* Divide by 64 for per-pixel summing. */ - propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6; + int intra_cost = frames[b]->i_intra_cost[mb_index]; /* Don't propagate for an intra block. */ if( inter_cost < intra_cost ) { - int mv[2][2], list; - mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0]; - mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1]; - if( b != p1 ) - { - mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0]; - mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1]; - } - + int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index]; + /* The approximate amount of data that this block contains. */ + int propagate_amount = frames[b]->i_propagate_cost[mb_index] + ((intra_cost * frames[b]->i_inv_qscale_factor[mb_index] + 128)>>8); + propagate_amount = ((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost; + int list; /* Follow the MVs to the previous frame(s). */ for( list = 0; list < 2; list++ ) if( (lists_used >> list)&1 ) { - int x = mv[list][0]; - int y = mv[list][1]; + int x = mvs[list][mb_index][0]; + int y = mvs[list][mb_index][1]; int listamount = propagate_amount; int mbx = (x>>5)+h->mb.i_mb_x; int mby = ((y>>5)+h->mb.i_mb_y); @@ -445,10 +436,12 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in int idx1 = idx0 + 1; int idx2 = idx0 + h->mb.i_mb_stride; int idx3 = idx0 + h->mb.i_mb_stride + 1; - int idx0weight = (32-(y&31))*(32-(x&31)); - int idx1weight = (32-(y&31))*(x&31); - int idx2weight = (y&31)*(32-(x&31)); - int idx3weight = (y&31)*(x&31); + x &= 31; + y &= 31; + int idx0weight = (32-y)*(32-x); + int idx1weight = (32-y)*x; + int idx2weight = y*(32-x); + int idx3weight = y*x; /* Apply bipred weighting. */ if( lists_used == 3 ) @@ -460,21 +453,21 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in * be counted. */ if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 ) { - CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 ); - CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 ); - CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 ); - CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 ); } else /* Check offsets individually */ { if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 ) - CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 ); if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 ) - CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 ); if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 ) - CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 ); if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 ) - CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 ); + CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 ); } } } @@ -497,7 +490,7 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t if( last_nonb < 0 ) return; - memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); + memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) ); while( i-- > idx ) { cur_nonb = i; @@ -506,12 +499,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t if( cur_nonb < idx ) break; x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 ); - memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); + memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) ); x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb ); while( frames[i]->i_type == X264_TYPE_B && i > 0 ) { x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 ); - memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); + memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) ); x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i ); i--; }