]> granicus.if.org Git - libx264/commitdiff
Various threading-related cosmetics
authorAnton Mitrofanov <BugMaster@narod.ru>
Tue, 26 Jan 2010 19:41:18 +0000 (11:41 -0800)
committerFiona Glaser <fiona@x264.com>
Sat, 30 Jan 2010 12:05:01 +0000 (04:05 -0800)
Simplify a lot of code and remove some unnecessary variables.

common/common.h
encoder/analyse.c
encoder/encoder.c
encoder/ratecontrol.c

index df39f261fb83d7aee50ae75da137d3a9c7406268..0f16e0a041fe83b8648c9703533d932a390fedac 100644 (file)
@@ -362,16 +362,12 @@ struct x264_t
 
     /* frame number/poc */
     int             i_frame;
+    int             i_frame_num;
 
-    int             i_frame_offset; /* decoding only */
-    int             i_frame_num;    /* decoding only */
-    int             i_poc_msb;      /* decoding only */
-    int             i_poc_lsb;      /* decoding only */
-    int             i_poc;          /* decoding only */
-
-    int             i_thread_num;   /* threads only */
-    int             i_nal_type;     /* threads only */
-    int             i_nal_ref_idc;  /* threads only */
+    int             i_thread_frames; /* Number of different frames being encoded by threads;
+                                      * 1 when sliced-threads is on. */
+    int             i_nal_type;
+    int             i_nal_ref_idc;
 
     /* We use only one SPS and one PPS */
     x264_sps_t      sps_array[1];
index 7a65354a7df3f99f48cc4107fed8d9cffce88806..ddd33f1a65b7ebe9c1f2b851ee8de5b5ae2c79ef 100644 (file)
@@ -413,7 +413,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
             int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
             int thread_mvy_range = i_fmv_range;
 
-            if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+            if( h->i_thread_frames > 1 )
             {
                 int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
                 int thresh = pix_y + h->param.analyse.i_mv_range_thread;
@@ -1167,7 +1167,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
         {
             h->mb.i_type = P_SKIP;
             x264_analyse_update_cache( h, a );
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
             return;
         }
 
@@ -1183,7 +1183,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
     }
 
     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
-    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
 
     h->mb.i_type = P_L0;
     if( a->i_mbrd )
@@ -2403,7 +2403,7 @@ intra_analysis:
             /* Fast P_SKIP detection */
             if( h->param.analyse.b_fast_pskip )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
+                if( h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
                     // FIXME don't need to check this if the reference frame is done
                     {}
                 else if( h->param.analyse.i_subpel_refine >= 3 )
@@ -2422,7 +2422,7 @@ intra_analysis:
         {
             h->mb.i_type = P_SKIP;
             h->mb.i_partition = D_16x16;
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
         }
         else
         {
@@ -3143,7 +3143,7 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
     }
 
 #ifndef NDEBUG
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads && !IS_INTRA(h->mb.i_type) )
+    if( h->i_thread_frames > 1 && !IS_INTRA(h->mb.i_type) )
     {
         int l;
         for( l=0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
index 13684ee4c84aa86f07b41cdb6617a0c6e3aac846..d69cf520f0ad14092f48b4441105b6537b513c1c 100644 (file)
@@ -421,6 +421,7 @@ static int x264_validate_parameters( x264_t *h )
     }
     else
         h->param.b_sliced_threads = 0;
+    h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
 
     if( h->param.b_interlaced )
     {
@@ -589,8 +590,8 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_lookahead = 0;
 #ifdef HAVE_PTHREAD
     if( h->param.i_sync_lookahead )
-        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->param.i_threads + h->param.i_bframe, X264_LOOKAHEAD_MAX );
-    if( h->param.rc.b_stat_read || h->param.i_threads == 1 || h->param.b_sliced_threads )
+        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->i_thread_frames + h->param.i_bframe, X264_LOOKAHEAD_MAX );
+    if( h->param.rc.b_stat_read || h->i_thread_frames == 1 )
         h->param.i_sync_lookahead = 0;
 #else
     h->param.i_sync_lookahead = 0;
@@ -708,7 +709,7 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
         h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;
 
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int r = h->param.analyse.i_mv_range_thread;
         int r2;
@@ -718,7 +719,7 @@ static int x264_validate_parameters( x264_t *h )
             // the rest is allocated to whichever thread is far enough ahead to use it.
             // reserving more space increases quality for some videos, but costs more time
             // in thread synchronization.
-            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->param.i_threads - X264_THREAD_HEIGHT;
+            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->i_thread_frames - X264_THREAD_HEIGHT;
             r = max_range / 2;
         }
         r = X264_MAX( r, h->param.analyse.i_me_range );
@@ -886,8 +887,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
         h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
     i_slicetype_length = h->frames.i_delay;
-    if( !h->param.b_sliced_threads )
-        h->frames.i_delay += h->param.i_threads - 1;
+    h->frames.i_delay += h->i_thread_frames - 1;
     h->frames.i_delay = X264_MIN( h->frames.i_delay, X264_LOOKAHEAD_MAX );
     h->frames.i_delay += h->param.i_sync_lookahead;
     h->frames.i_bframe_delay = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 2 : 1) : 0;
@@ -910,11 +910,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
 
     CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
     /* Allocate room for max refs plus a few extra just in case. */
-    CHECKED_MALLOCZERO( h->frames.unused[1], (h->param.i_threads + 20) * sizeof(x264_frame_t *) );
+    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + 20) * sizeof(x264_frame_t *) );
     CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
-                        + h->param.i_threads + 3) * sizeof(x264_frame_t *) );
+                        + h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
     if( h->param.analyse.i_weighted_pred > 0 )
-        CHECKED_MALLOCZERO( h->frames.blank_unused, h->param.i_threads * 4 * sizeof(x264_frame_t *) );
+        CHECKED_MALLOCZERO( h->frames.blank_unused, h->i_thread_frames * 4 * sizeof(x264_frame_t *) );
     h->i_ref0 = 0;
     h->i_ref1 = 0;
 
@@ -977,7 +977,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
 
     h->thread[0] = h;
-    h->i_thread_num = 0;
     for( i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
         CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
 
@@ -1501,7 +1500,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
         }
     }
 
-    if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
         x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
 
     min_y = X264_MAX( min_y*16-8, 0 );
@@ -1537,7 +1536,7 @@ static inline int x264_reference_update( x264_t *h )
     int i, j;
     if( !h->fdec->b_kept_as_ref )
     {
-        if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+        if( h->i_thread_frames > 1 )
         {
             x264_frame_push_unused( h, h->fdec );
             h->fdec = x264_frame_pop_unused( h, 1 );
@@ -1982,11 +1981,15 @@ static int x264_threaded_slices_write( x264_t *h )
 
     /* dispatch */
     for( i = 0; i < h->param.i_threads; i++ )
+    {
         if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
             return -1;
+        h->thread[i]->b_thread_active = 1;
+    }
     for( i = 0; i < h->param.i_threads; i++ )
     {
         x264_pthread_join( h->thread[i]->thread_handle, &ret );
+        h->thread[i]->b_thread_active = 0;
         if( (intptr_t)ret )
             return (intptr_t)ret;
     }
@@ -2036,12 +2039,12 @@ int     x264_encoder_encode( x264_t *h,
     x264_t *thread_current, *thread_prev, *thread_oldest;
     int i_nal_type, i_nal_ref_idc, i_global_qp, i;
 
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         thread_prev    = h->thread[ h->i_thread_phase ];
-        h->i_thread_phase = (h->i_thread_phase + 1) % h->param.i_threads;
+        h->i_thread_phase = (h->i_thread_phase + 1) % h->i_thread_frames;
         thread_current = h->thread[ h->i_thread_phase ];
-        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->param.i_threads ];
+        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->i_thread_frames ];
         x264_thread_sync_context( thread_current, thread_prev );
         x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest );
         h = thread_current;
@@ -2100,7 +2103,7 @@ int     x264_encoder_encode( x264_t *h,
         /* 2: Place the frame into the queue for its slice type decision */
         x264_lookahead_put_frame( h, fenc );
 
-        if( h->frames.i_input <= h->frames.i_delay + (h->param.b_sliced_threads ? 0 : 1 - h->param.i_threads) )
+        if( h->frames.i_input <= h->frames.i_delay + 1 - h->i_thread_frames )
         {
             /* Nothing yet to encode, waiting for filling of buffers */
             pic_out->i_type = X264_TYPE_AUTO;
@@ -2327,7 +2330,7 @@ int     x264_encoder_encode( x264_t *h,
     /* Write frame */
     h->i_threadslice_start = 0;
     h->i_threadslice_end = h->sps->i_mb_height;
-    if( !h->param.b_sliced_threads && h->param.i_threads > 1 )
+    if( h->i_thread_frames > 1 )
     {
         if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
             return -1;
@@ -2356,9 +2359,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
     {
         void *ret = NULL;
         x264_pthread_join( h->thread_handle, &ret );
+        h->b_thread_active = 0;
         if( (intptr_t)ret )
             return (intptr_t)ret;
-        h->b_thread_active = 0;
     }
     if( !h->out.i_nal )
     {
@@ -2564,25 +2567,28 @@ void    x264_encoder_close  ( x264_t *h )
 
     x264_lookahead_delete( h );
 
-    for( i = 0; i < h->param.i_threads; i++ )
+    if( h->param.i_threads > 1 )
     {
         // don't strictly have to wait for the other threads, but it's simpler than canceling them
-        if( h->thread[i]->b_thread_active )
+        for( i = 0; i < h->param.i_threads; i++ )
+            if( h->thread[i]->b_thread_active )
+                x264_pthread_join( h->thread[i]->thread_handle, NULL );
+        if( h->i_thread_frames > 1 )
         {
-            x264_pthread_join( h->thread[i]->thread_handle, NULL );
-            assert( h->thread[i]->fenc->i_reference_count == 1 );
-            x264_frame_delete( h->thread[i]->fenc );
-        }
-    }
-
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
-    {
-        x264_t *thread_prev;
+            for( i = 0; i < h->i_thread_frames; i++ )
+            {
+                if( h->thread[i]->b_thread_active )
+                {
+                    assert( h->thread[i]->fenc->i_reference_count == 1 );
+                    x264_frame_delete( h->thread[i]->fenc );
+                }
+            }
 
-        thread_prev = h->thread[h->i_thread_phase];
-        x264_thread_sync_ratecontrol( h, thread_prev, h );
-        x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
-        h->i_frame = thread_prev->i_frame + 1 - h->param.i_threads;
+            x264_t *thread_prev = h->thread[h->i_thread_phase];
+            x264_thread_sync_ratecontrol( h, thread_prev, h );
+            x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
+            h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
+        }
     }
     h->i_frame++;
 
@@ -2833,7 +2839,7 @@ void    x264_encoder_close  ( x264_t *h )
     x264_free( h->nal_buffer );
     x264_analyse_free_costs( h );
 
-    if( h->param.i_threads > 1)
+    if( h->i_thread_frames > 1)
         h = h->thread[h->i_thread_phase];
 
     /* frames */
@@ -2878,9 +2884,12 @@ int x264_encoder_delayed_frames( x264_t *h )
 {
     int delayed_frames = 0;
     int i;
-    for( i=0; i<h->param.i_threads; i++ )
-        delayed_frames += h->thread[i]->b_thread_active;
-    h = h->thread[h->i_thread_phase];
+    if( h->i_thread_frames > 1 )
+    {
+        for( i=0; i<h->i_thread_frames; i++ )
+            delayed_frames += h->thread[i]->b_thread_active;
+        h = h->thread[h->i_thread_phase];
+    }
     for( i=0; h->frames.current[i]; i++ )
         delayed_frames++;
     x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
index 5044329e824e822f90ba58a0a3ccd4663428fc38..f1a431523df585bfead154a37240f4f9ad23c8cc 100644 (file)
@@ -1578,13 +1578,13 @@ static void update_vbv_plan( x264_t *h, int overhead )
 {
     x264_ratecontrol_t *rcc = h->rc;
     rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int j = h->rc - h->thread[0]->rc;
         int i;
-        for( i=1; i<h->param.i_threads; i++ )
+        for( i=1; i<h->i_thread_frames; i++ )
         {
-            x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+            x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
             double bits = t->rc->frame_size_planned;
             if( !t->b_thread_active )
                 continue;
@@ -1794,7 +1794,7 @@ static float rate_estimate_qscale( x264_t *h )
     }
     else
     {
-        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * (h->param.b_sliced_threads?1:h->param.i_threads);
+        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * h->i_thread_frames;
 
         if( rcc->b_2pass )
         {
@@ -1804,13 +1804,13 @@ static float rate_estimate_qscale( x264_t *h )
 
             if( rcc->b_vbv )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+                if( h->i_thread_frames > 1 )
                 {
                     int j = h->rc - h->thread[0]->rc;
                     int i;
-                    for( i=1; i<h->param.i_threads; i++ )
+                    for( i=1; i<h->i_thread_frames; i++ )
                     {
-                        x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+                        x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
                         double bits = t->rc->frame_size_planned;
                         if( !t->b_thread_active )
                             continue;
@@ -1821,16 +1821,16 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                if( h->fenc->i_frame < h->param.i_threads )
+                if( h->fenc->i_frame < h->i_thread_frames )
                     predicted_bits += (int64_t)h->fenc->i_frame * rcc->bitrate / rcc->fps;
                 else
-                    predicted_bits += (int64_t)(h->param.i_threads - 1) * rcc->bitrate / rcc->fps;
+                    predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
             }
 
             diff = predicted_bits - (int64_t)rce.expected_bits;
             q = rce.new_qscale;
             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
-            if( ((h->fenc->i_frame + 1 - h->param.i_threads) >= rcc->fps) &&
+            if( ((h->fenc->i_frame + 1 - h->i_thread_frames) >= rcc->fps) &&
                 (rcc->expected_bits_sum > 0))
             {
                 /* Adjust quant based on the difference between
@@ -1897,7 +1897,7 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
+                int i_frame_done = h->fenc->i_frame + 1 - h->i_thread_frames;
 
                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );