From 78071b3b97e0fd227d94bf8046c21bcd3ee7c433 Mon Sep 17 00:00:00 2001 From: Attila Nagy Date: Fri, 16 Sep 2011 13:54:06 +0300 Subject: [PATCH] Multithreaded encoder, late sync loopfilter Second shot at this... Sync with loopfilter thread as late as possible, usually just at the beginning of next frame encoding. This returns control to application faster and allows a better multicore scaling. When PSNR packets are generated the final filtered frame is needed imediatly so we cannot delay the sync. Same has to be done when internal frame is previewed. Change-Id: I64e110c8b224dd967faefffd9c93dd8dbad4a5b5 --- vp8/encoder/ethreading.c | 9 +++++---- vp8/encoder/onyx_if.c | 28 ++++++++++++++++++++++++---- vp8/encoder/onyx_int.h | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 7740e5db7..2874e7845 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -24,9 +24,9 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); -extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); +extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); -static THREAD_FUNCTION loopfilter_thread(void *p_data) +static THREAD_FUNCTION thread_loopfilter(void *p_data) { VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); VP8_COMMON *cm = &cpi->common; @@ -41,7 +41,7 @@ static THREAD_FUNCTION loopfilter_thread(void *p_data) if (cpi->b_multi_threaded == 0) // we're shutting down break; - loopfilter_frame(cpi, cm); + vp8_loopfilter_frame(cpi, cm); sem_post(&cpi->h_event_end_lpf); } @@ -468,6 +468,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; + cpi->b_lpf_running = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { @@ -526,7 +527,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; - pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd); + pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); } } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 57656bb4f..1082245d3 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -3086,7 +3086,7 @@ void update_reference_frames(VP8_COMMON *cm) } } -void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) +void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) { if (cm->no_lpf) { @@ -3716,6 +3716,15 @@ static void encode_frame_to_data_rate vp8_setup_key_frame(cpi); } +#if CONFIG_MULTITHREAD + /* wait for the last picture loopfilter thread done */ + if (cpi->b_lpf_running) + { + sem_wait(&cpi->h_event_end_lpf); + cpi->b_lpf_running = 0; + } +#endif + // transform / motion compensation build reconstruction frame vp8_encode_frame(cpi); @@ -4074,11 +4083,12 @@ static void encode_frame_to_data_rate if (cpi->b_multi_threaded) { sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ + cpi->b_lpf_running = 1; } else #endif { - loopfilter_frame(cpi, cm); + vp8_loopfilter_frame(cpi, cm); } update_reference_frames(cm); @@ -4098,10 +4108,11 @@ static void encode_frame_to_data_rate vp8_pack_bitstream(cpi, dest, dest_end, size); #if CONFIG_MULTITHREAD - /* wait for loopfilter thread done */ - if (cpi->b_multi_threaded) + /* if PSNR packets are generated we have to wait for the lpf */ + if (cpi->b_lpf_running && cpi->b_calculate_psnr) { sem_wait(&cpi->h_event_end_lpf); + cpi->b_lpf_running = 0; } #endif @@ -5107,6 +5118,15 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla else { int ret; + +#if CONFIG_MULTITHREAD + if(cpi->b_lpf_running) + { + sem_wait(&cpi->h_event_end_lpf); + cpi->b_lpf_running = 0; + } +#endif + #if CONFIG_POSTPROC ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 6920fc316..4b965cf30 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -526,6 +526,7 @@ typedef struct VP8_COMP int mt_sync_range; int b_multi_threaded; int encoding_thread_count; + int b_lpf_running; pthread_t *h_encoding_thread; pthread_t h_filter_thread; -- 2.40.0