Saves around 100 million clock cycles on x264 init.
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
-int x264_analyse_init_costs( x264_t *h, int qp )
+float *x264_analyse_prepare_costs( x264_t *h )
+{
+ float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
+ if( !logs )
+ return NULL;
+ logs[0] = 0.718f;
+ for( int i = 1; i <= 2*4*2048; i++ )
+ logs[i] = log2f(i+1)*2 + 1.718f;
+ return logs;
+}
+
+int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
{
int lambda = x264_lambda_tab[qp];
if( h->cost_mv[qp] )
for( int i = 0; i <= 2*4*2048; i++ )
{
h->cost_mv[qp][-i] =
- h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
+ h->cost_mv[qp][i] = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
#ifndef X264_ANALYSE_H
#define X264_ANALYSE_H
-int x264_analyse_init_costs( x264_t *h, int qp );
+float *x264_analyse_prepare_costs( x264_t *h );
+int x264_analyse_init_costs( x264_t *h, float *logs, int qp );
void x264_analyse_free_costs( x264_t *h );
void x264_analyse_weight_frame( x264_t *h, int end );
void x264_macroblock_analyse( x264_t *h );
p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf );
+ float *logs = x264_analyse_prepare_costs( h );
+ if( !logs )
+ goto fail;
for( qp = X264_MIN( h->param.rc.i_qp_min, QP_MAX_SPEC ); qp <= h->param.rc.i_qp_max; qp++ )
- if( x264_analyse_init_costs( h, qp ) )
+ if( x264_analyse_init_costs( h, logs, qp ) )
goto fail;
- if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
+ if( x264_analyse_init_costs( h, logs, X264_LOOKAHEAD_QP ) )
goto fail;
+ x264_free( logs );
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */