From a155572ed547a3627ef00ca70ab804ff452147cd Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Fri, 24 Feb 2012 13:34:39 -0800 Subject: [PATCH] Sliced-threads: do hpel and deblock after returning Lowers encoding latency around 14% in sliced threads mode with preset superfast. Additionally, even if there is no waiting time between frames, this improves parallelism, because hpel+deblock are done during the (singlethreaded) lookahead. For ease of debugging, dump-yuv forces all of the threads to wait and finish instead of setting b_full_recon. --- common/common.h | 7 ++ common/deblock.c | 4 +- common/frame.c | 43 ++++++++---- common/frame.h | 5 +- common/macroblock.c | 19 +++++- encoder/encoder.c | 156 ++++++++++++++++++++++++++++++++------------ 6 files changed, 176 insertions(+), 58 deletions(-) diff --git a/common/common.h b/common/common.h index 835fde52..fab453e3 100644 --- a/common/common.h +++ b/common/common.h @@ -470,9 +470,13 @@ struct x264_t x264_t *thread[X264_THREAD_MAX+1]; int b_thread_active; int i_thread_phase; /* which thread to use for the next frame */ + int i_thread_idx; /* which thread this is */ int i_threadslice_start; /* first row in this thread slice */ int i_threadslice_end; /* row after the end of this thread slice */ + int i_threadslice_pass; /* which pass of encoding we are on */ x264_threadpool_t *threadpool; + x264_pthread_mutex_t mutex; + x264_pthread_cond_t cv; /* bitstream output */ struct @@ -823,6 +827,9 @@ struct x264_t /* extra data required for mbaff in mv prediction */ int16_t topright_mv[2][3][2]; int8_t topright_ref[2][3]; + + /* current mb deblock strength */ + uint8_t (*deblock_strength)[8][4]; } cache; /* */ diff --git a/common/deblock.c b/common/deblock.c index 51f0d7a8..bab9e5d9 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -395,7 +395,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) int mb_xy = h->mb.i_mb_xy; int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy]; int intra_cur = IS_INTRA( h->mb.type[mb_xy] ); - uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x]; + uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?mb_xy:mb_x]; pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x; pixel *pixuv = h->fdec->plane[1] + chroma_height*mb_y*strideuv + 16*mb_x; @@ -592,7 +592,7 @@ void x264_macroblock_deblock( x264_t *h ) if( (h->mb.i_partition == D_16x16 && !h->mb.i_cbp_luma && !intra_cur) || qp <= qp_thresh ) return; - uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x]; + uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength; if( intra_cur ) { memset( &bs[0][1], 3, 3*4*sizeof(uint8_t) ); diff --git a/common/frame.c b/common/frame.c index 21d13476..8a174062 100644 --- a/common/frame.c +++ b/common/frame.c @@ -480,9 +480,12 @@ static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_w #undef PPIXEL } -void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ) +void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y ) { - int b_start = !mb_y; + int pad_top = mb_y == 0; + int pad_bot = mb_y == h->mb.i_mb_height - (1 << SLICE_MBAFF); + int b_start = mb_y == h->i_threadslice_start; + int b_end = mb_y == h->i_threadslice_end - (1 << SLICE_MBAFF); if( mb_y & SLICE_MBAFF ) return; for( int i = 0; i < frame->i_plane; i++ ) @@ -491,30 +494,31 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_e int v_shift = i && CHROMA_V_SHIFT; int stride = frame->i_stride[i]; int width = 16*h->mb.i_mb_width; - int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> v_shift; + int height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> v_shift; int padh = PADH; int padv = PADV >> v_shift; // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb if( b_end && !b_start ) height += 4 >> (v_shift + SLICE_MBAFF); pixel *pix; + int starty = 16*mb_y - 4*!b_start; if( SLICE_MBAFF ) { // border samples for each field are extended separately - pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift); - plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, h_shift ); - plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, h_shift ); + pix = frame->plane_fld[i] + (starty*stride >> v_shift); + plane_expand_border( pix, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift ); + plane_expand_border( pix+stride, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift ); - height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> v_shift; + height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) : 32) >> v_shift; if( b_end && !b_start ) height += 4 >> v_shift; - pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift); - plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, h_shift ); + pix = frame->plane[i] + (starty*stride >> v_shift); + plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift ); } else { - pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift); - plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, h_shift ); + pix = frame->plane[i] + (starty*stride >> v_shift); + plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift ); } } } @@ -619,6 +623,23 @@ void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) x264_pthread_mutex_unlock( &frame->mutex ); } +void x264_threadslice_cond_broadcast( x264_t *h, int pass ) +{ + x264_pthread_mutex_lock( &h->mutex ); + h->i_threadslice_pass = pass; + if( pass > 0 ) + x264_pthread_cond_broadcast( &h->cv ); + x264_pthread_mutex_unlock( &h->mutex ); +} + +void x264_threadslice_cond_wait( x264_t *h, int pass ) +{ + x264_pthread_mutex_lock( &h->mutex ); + while( h->i_threadslice_pass < pass ) + x264_pthread_cond_wait( &h->cv, &h->mutex ); + x264_pthread_mutex_unlock( &h->mutex ); +} + /* list operators */ void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ) diff --git a/common/frame.h b/common/frame.h index 54415f7f..31f0a3f1 100644 --- a/common/frame.h +++ b/common/frame.h @@ -207,7 +207,7 @@ void x264_frame_delete( x264_frame_t *frame ); int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src ); -void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ); +void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y ); void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ); void x264_frame_expand_border_lowres( x264_frame_t *frame ); void x264_frame_expand_border_chroma( x264_t *h, x264_frame_t *frame, int plane ); @@ -225,6 +225,9 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mba void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); +void x264_threadslice_cond_broadcast( x264_t *h, int pass ); +void x264_threadslice_cond_wait( x264_t *h, int pass ); + void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); x264_frame_t *x264_frame_pop( x264_frame_t **list ); void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); diff --git a/common/macroblock.c b/common/macroblock.c index 6bb0566e..8216799c 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -368,7 +368,17 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ) } for( int i = 0; i <= PARAM_INTERLACED; i++ ) { - CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width ); + if( h->param.b_sliced_threads ) + { + /* Only allocate the first one, and allocate it for the whole frame, because we + * won't be deblocking until after the frame is fully encoded. */ + if( h == h->thread[0] && !i ) + CHECKED_MALLOC( h->deblock_strength[0], sizeof(**h->deblock_strength) * h->mb.i_mb_count ); + else + h->deblock_strength[i] = h->thread[0]->deblock_strength[0]; + } + else + CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width ); h->deblock_strength[1] = h->deblock_strength[i]; } } @@ -401,7 +411,8 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead ) if( !b_lookahead ) { for( int i = 0; i <= PARAM_INTERLACED; i++ ) - x264_free( h->deblock_strength[i] ); + if( !h->param.b_sliced_threads || (h == h->thread[0] && !i) ) + x264_free( h->deblock_strength[i] ); for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ ) for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ ) x264_free( h->intra_border_backup[i][j] - 16 ); @@ -858,6 +869,8 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m const x264_left_table_t *left_index_table = h->mb.left_index_table; + h->mb.cache.deblock_strength = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?h->mb.i_mb_xy:mb_x]; + /* load cache */ if( h->mb.i_neighbour & MB_TOP ) { @@ -1432,7 +1445,7 @@ static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][ void x264_macroblock_deblock_strength( x264_t *h ) { - uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x]; + uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength; if( IS_INTRA( h->mb.i_type ) ) { memset( bs[0][1], 3, 3*4*sizeof(uint8_t) ); diff --git a/encoder/encoder.c b/encoder/encoder.c index 85260d22..03d05aed 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -68,12 +68,28 @@ static double x264_ssim( double ssim ) return -10.0 * log10( inv_ssim ); } +static int x264_threadpool_wait_all( x264_t *h ) +{ + for( int i = 0; i < h->param.i_threads; i++ ) + if( h->thread[i]->b_thread_active ) + { + h->thread[i]->b_thread_active = 0; + if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) < 0 ) + return -1; + } + return 0; +} + static void x264_frame_dump( x264_t *h ) { FILE *f = fopen( h->param.psz_dump_yuv, "r+b" ); if( !f ) return; + /* Wait for the threads to finish deblocking */ + if( h->param.b_sliced_threads ) + x264_threadpool_wait_all( h ); + /* Write the frame in display order */ int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * sizeof(pixel) ); fseek( f, (uint64_t)h->fdec->i_frame * frame_size, SEEK_SET ); @@ -921,9 +937,6 @@ static int x264_validate_parameters( x264_t *h, int b_open ) h->param.i_nal_hrd = X264_NAL_HRD_VBR; } - if( h->param.psz_dump_yuv ) - h->param.b_full_recon = 1; - /* ensure the booleans are 0 or 1 so they can be used in math */ #define BOOLIFY(x) h->param.x = !!h->param.x BOOLIFY( b_cabac ); @@ -1258,8 +1271,18 @@ x264_t *x264_encoder_open( x264_param_t *param ) goto fail; h->thread[0] = h; - for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ ) - CHECKED_MALLOC( h->thread[i], sizeof(x264_t) ); + for( int i = 0; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ ) + { + if( i ) + CHECKED_MALLOC( h->thread[i], sizeof(x264_t) ); + if( i < h->param.i_threads ) + { + if( x264_pthread_mutex_init( &h->thread[i]->mutex, NULL ) ) + goto fail; + if( x264_pthread_cond_init( &h->thread[i]->cv, NULL ) ) + goto fail; + } + } for( int i = 0; i < h->param.i_threads; i++ ) { @@ -1354,6 +1377,11 @@ fail: ****************************************************************************/ int x264_encoder_reconfig( x264_t *h, x264_param_t *param ) { + /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */ + if( h->param.b_sliced_threads ) + if( x264_threadpool_wait_all( h ) < 0 ) + return -1; + int rc_reconfig = 0; h = h->thread[h->thread[0]->i_thread_phase]; x264_set_aspect_ratio( h, param, 0 ); @@ -1830,7 +1858,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc ) h->mb.pic.i_fref[1] = h->i_ref[1]; } -static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) +static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass ) { /* mb_y is the mb to be encoded next, not the mb to be filtered here */ int b_hpel = h->fdec->b_kept_as_ref; @@ -1843,11 +1871,30 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) * above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */ int minpix_y = min_y*16 - 4 * !b_start; int maxpix_y = mb_y*16 - 4 * !b_end; - b_deblock &= b_hpel || h->param.b_full_recon; - if( h->param.b_sliced_threads && b_start && min_y && !b_inloop ) + b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv; + if( h->param.b_sliced_threads ) { - b_deblock = 0; /* We already deblocked on the inloop pass. */ - b_measure_quality = 0; /* We already measured quality on the inloop pass. */ + switch( pass ) + { + /* During encode: only do deblock if asked for */ + default: + case 0: + b_deblock &= h->param.b_full_recon; + b_hpel = 0; + break; + /* During post-encode pass: do deblock if not done yet, do hpel for all + * rows except those between slices. */ + case 1: + b_deblock &= !h->param.b_full_recon; + b_hpel &= !(b_start && min_y > 0); + b_measure_quality = 0; + break; + /* Final pass: do the rows between slices in sequence. */ + case 2: + b_deblock = 0; + b_measure_quality = 0; + break; + } } if( mb_y & SLICE_MBAFF ) return; @@ -1861,17 +1908,19 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) /* FIXME: Prediction requires different borders for interlaced/progressive mc, * but the actual image data is equivalent. For now, maintain this * consistency by copying deblocked pixels between planes. */ - if( PARAM_INTERLACED ) + if( PARAM_INTERLACED && (!h->param.b_sliced_threads || pass == 1) ) for( int p = 0; p < h->fdec->i_plane; p++ ) for( int i = minpix_y>>(CHROMA_V_SHIFT && p); i < maxpix_y>>(CHROMA_V_SHIFT && p); i++ ) memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p], h->fdec->plane[p] + i*h->fdec->i_stride[p], h->mb.i_mb_width*16*sizeof(pixel) ); + if( h->fdec->b_kept_as_ref && (!h->param.b_sliced_threads || pass == 1) ) + x264_frame_expand_border( h, h->fdec, min_y ); if( b_hpel ) { int end = mb_y == h->mb.i_mb_height; - x264_frame_expand_border( h, h->fdec, min_y, end ); + /* Can't do hpel until the previous slice is done encoding. */ if( h->param.analyse.i_subpel_refine ) { x264_frame_filter( h, h->fdec, min_y, end ); @@ -1879,7 +1928,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) } } - if( SLICE_MBAFF ) + if( SLICE_MBAFF && pass == 0 ) for( int i = 0; i < 3; i++ ) { XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] ); @@ -2148,7 +2197,7 @@ static int x264_slice_write( x264_t *h ) int orig_last_mb = h->sh.i_last_mb; uint8_t *last_emu_check; x264_bs_bak_t bs_bak[2]; - b_deblock &= b_hpel || h->param.b_full_recon; + b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv; bs_realign( &h->out.bs ); /* Slice */ @@ -2200,7 +2249,7 @@ static int x264_slice_write( x264_t *h ) if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size ) x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 ); if( !h->mb.b_reencode_mb ) - x264_fdec_filter_row( h, i_mb_y, 1 ); + x264_fdec_filter_row( h, i_mb_y, 0 ); } if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream ) @@ -2447,7 +2496,23 @@ reencode: + (h->out.i_nal*NALU_OVERHEAD * 8) - h->stat.frame.i_tex_bits - h->stat.frame.i_mv_bits; - x264_fdec_filter_row( h, h->i_threadslice_end, 1 ); + x264_fdec_filter_row( h, h->i_threadslice_end, 0 ); + + if( h->param.b_sliced_threads ) + { + /* Tell the main thread we're done. */ + x264_threadslice_cond_broadcast( h, 1 ); + /* Do hpel now */ + for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ ) + x264_fdec_filter_row( h, mb_y, 1 ); + x264_threadslice_cond_broadcast( h, 2 ); + /* Do the first row of hpel, now that the previous slice is done */ + if( h->i_thread_idx > 0 ) + { + x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 ); + x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 ); + } + } } return 0; @@ -2488,7 +2553,7 @@ static void *x264_slices_write( x264_t *h ) #if HAVE_VISUALIZE if( h->param.b_visualize ) if( x264_visualize_init( h ) ) - return (void *)-1; + goto fail; #endif /* init stats */ @@ -2521,7 +2586,7 @@ static void *x264_slices_write( x264_t *h ) } h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb ); if( x264_stack_align( x264_slice_write, h ) ) - return (void *)-1; + goto fail; h->sh.i_first_mb = h->sh.i_last_mb + 1; // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width ) @@ -2537,6 +2602,12 @@ static void *x264_slices_write( x264_t *h ) #endif return (void *)0; + +fail: + /* Tell other threads we're done, so they wouldn't wait for it */ + if( h->param.b_sliced_threads ) + x264_threadslice_cond_broadcast( h, 2 ); + return (void *)-1; } static int x264_threaded_slices_write( x264_t *h ) @@ -2561,26 +2632,19 @@ static int x264_threaded_slices_write( x264_t *h ) x264_threads_distribute_ratecontrol( h ); - /* dispatch */ + /* setup */ for( int i = 0; i < h->param.i_threads; i++ ) { - x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] ); + h->thread[i]->i_thread_idx = i; h->thread[i]->b_thread_active = 1; + x264_threadslice_cond_broadcast( h->thread[i], 0 ); } + /* dispatch */ for( int i = 0; i < h->param.i_threads; i++ ) - { - h->thread[i]->b_thread_active = 0; - if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) ) - return -1; - } - - /* Go back and fix up the hpel on the borders between slices. */ - for( int i = 1; i < h->param.i_threads; i++ ) - { - x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 1, 0 ); - if( SLICE_MBAFF ) - x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 2, 0 ); - } + x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] ); + /* wait */ + for( int i = 0; i < h->param.i_threads; i++ ) + x264_threadslice_cond_wait( h->thread[i], 1 ); x264_threads_merge_ratecontrol( h ); @@ -2677,11 +2741,6 @@ int x264_encoder_encode( x264_t *h, x264_cpu_mask_misalign_sse(); #endif - // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0 - if( x264_reference_update( h ) ) - return -1; - h->fdec->i_lines_completed = -1; - /* no data out */ *pi_nal = 0; *pp_nal = NULL; @@ -2777,6 +2836,12 @@ int x264_encoder_encode( x264_t *h, /* ------------------- Get frame to be encoded ------------------------- */ /* 4: get picture to encode */ h->fenc = x264_frame_shift( h->frames.current ); + + /* If applicable, wait for previous frame reconstruction to finish */ + if( h->param.b_sliced_threads ) + if( x264_threadpool_wait_all( h ) < 0 ) + return -1; + if( h->i_frame == h->i_thread_frames - 1 ) h->i_reordered_pts_delay = h->fenc->i_reordered_pts; if( h->fenc->param ) @@ -2786,6 +2851,11 @@ int x264_encoder_encode( x264_t *h, h->fenc->param->param_free( h->fenc->param ); } + // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0 + if( x264_reference_update( h ) ) + return -1; + h->fdec->i_lines_completed = -1; + if( !IS_X264_TYPE_I( h->fenc->i_type ) ) { int valid_refs_left = 0; @@ -3117,7 +3187,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, { char psz_message[80]; - if( h->b_thread_active ) + if( !h->param.b_sliced_threads && h->b_thread_active ) { h->b_thread_active = 0; if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) ) @@ -3381,6 +3451,8 @@ void x264_encoder_close ( x264_t *h ) x264_lookahead_delete( h ); + if( h->param.b_sliced_threads ) + x264_threadpool_wait_all( h ); if( h->param.i_threads > 1 ) x264_threadpool_delete( h->threadpool ); if( h->i_thread_frames > 1 ) @@ -3675,7 +3747,7 @@ void x264_encoder_close ( x264_t *h ) x264_free( h->nal_buffer ); x264_analyse_free_costs( h ); - if( h->i_thread_frames > 1) + if( h->i_thread_frames > 1 ) h = h->thread[h->i_thread_phase]; /* frames */ @@ -3717,7 +3789,9 @@ void x264_encoder_close ( x264_t *h ) } x264_macroblock_thread_free( h->thread[i], 0 ); x264_free( h->thread[i]->out.p_bitstream ); - x264_free( h->thread[i]->out.nal); + x264_free( h->thread[i]->out.nal ); + x264_pthread_mutex_destroy( &h->thread[i]->mutex ); + x264_pthread_cond_destroy( &h->thread[i]->cv ); x264_free( h->thread[i] ); } } -- 2.40.0