From: Fiona Glaser Date: Sat, 23 May 2009 01:40:12 +0000 (-0700) Subject: Save some memory on mv cost arrays X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1aed7cd36955e1dcd2ed3e5cd1605b0978e7e9c1;p=libx264 Save some memory on mv cost arrays Have quantizers that use the same lambda share the same cost array. --- diff --git a/encoder/analyse.c b/encoder/analyse.c index f9958694..e55b9609 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -172,44 +172,46 @@ static const int i_sub_mb_p_cost_table[4] = { static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ); -uint16_t *x264_cost_mv_fpel[52][4]; -uint16_t x264_cost_ref[52][3][33]; +/* Indexed by lambda instead of qp because, due to rounding, + * some quantizers share lambdas. This saves memory. */ +uint16_t *x264_cost_mv_fpel[92][4]; +uint16_t x264_cost_ref[92][3][33]; /* initialize an array of lambda*nbits for all possible mvs */ static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a ) { - static int16_t *p_cost_mv[52]; + static int16_t *p_cost_mv[92]; int i, j; - if( !p_cost_mv[a->i_qp] ) + if( !p_cost_mv[a->i_lambda] ) { x264_emms(); /* could be faster, but isn't called many times */ /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */ - p_cost_mv[a->i_qp] = x264_malloc( (4*4*2048 + 1) * sizeof(int16_t) ); - p_cost_mv[a->i_qp] += 2*4*2048; + p_cost_mv[a->i_lambda] = x264_malloc( (4*4*2048 + 1) * sizeof(int16_t) ); + p_cost_mv[a->i_lambda] += 2*4*2048; for( i = 0; i <= 2*4*2048; i++ ) { - p_cost_mv[a->i_qp][-i] = - p_cost_mv[a->i_qp][i] = a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f; + p_cost_mv[a->i_lambda][-i] = + p_cost_mv[a->i_lambda][i] = a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f; } for( i = 0; i < 3; i++ ) for( j = 0; j < 33; j++ ) - x264_cost_ref[a->i_qp][i][j] = i ? a->i_lambda * bs_size_te( i, j ) : 0; + x264_cost_ref[a->i_lambda][i][j] = i ? a->i_lambda * bs_size_te( i, j ) : 0; } - a->p_cost_mv = p_cost_mv[a->i_qp]; - a->p_cost_ref0 = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)]; - a->p_cost_ref1 = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)]; + a->p_cost_mv = p_cost_mv[a->i_lambda]; + a->p_cost_ref0 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)]; + a->p_cost_ref1 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)]; /* FIXME is this useful for all me methods? */ - if( h->param.analyse.i_me_method >= X264_ME_ESA && !x264_cost_mv_fpel[a->i_qp][0] ) + if( h->param.analyse.i_me_method >= X264_ME_ESA && !x264_cost_mv_fpel[a->i_lambda][0] ) { for( j=0; j<4; j++ ) { - x264_cost_mv_fpel[a->i_qp][j] = x264_malloc( (4*2048 + 1) * sizeof(int16_t) ); - x264_cost_mv_fpel[a->i_qp][j] += 2*2048; + x264_cost_mv_fpel[a->i_lambda][j] = x264_malloc( (4*2048 + 1) * sizeof(int16_t) ); + x264_cost_mv_fpel[a->i_lambda][j] += 2*2048; for( i = -2*2048; i < 2*2048; i++ ) - x264_cost_mv_fpel[a->i_qp][j][i] = p_cost_mv[a->i_qp][i*4+j]; + x264_cost_mv_fpel[a->i_lambda][j][i] = p_cost_mv[a->i_lambda][i*4+j]; } } } diff --git a/encoder/me.h b/encoder/me.h index 3d7a446c..3910f747 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -63,7 +63,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ); uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ); -extern uint16_t *x264_cost_mv_fpel[52][4]; +extern uint16_t *x264_cost_mv_fpel[92][4]; #define COPY1_IF_LT(x,y)\ if((y)<(x))\