From 3ae24657887ed0a9f2e547973640890e9dfa6ea4 Mon Sep 17 00:00:00 2001 From: Attila Nagy Date: Fri, 25 Feb 2011 13:42:05 +0200 Subject: [PATCH] Encoder loopfilter running in its own thread In multithreaded mode the loopfilter is running in its own thread (filter level calculation and frame filtering). Filtering is mostly done in parallel with the bitstream packing. Before starting the packing the loopfilter level has to be calculated. Also any needed reference frame copying is done in the filter thread. Currently the encoder will create n+1 threads, where n > 1 is the number of threads specified by application and 1 is the extra filter thread. With n = 1 the encoder runs in single thread mode. There will never be more than n threads running concurrently. Change-Id: I4fb29b559a40275d6d3babb8727245c40fba931b --- vp8/encoder/ethreading.c | 43 ++++++++++ vp8/encoder/onyx_if.c | 178 +++++++++++++++++++++++---------------- vp8/encoder/onyx_int.h | 5 ++ 3 files changed, 155 insertions(+), 71 deletions(-) diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 12d5f66d3..5c607a0cb 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -24,6 +24,35 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); +#if CONFIG_MULTITHREAD + +extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); + +static THREAD_FUNCTION loopfilter_thread(void *p_data) +{ + VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); + VP8_COMMON *cm = &cpi->common; + + while (1) + { + if (cpi->b_multi_threaded == 0) + break; + + if (sem_wait(&cpi->h_event_start_lpf) == 0) + { + if (cpi->b_multi_threaded == FALSE) // we're shutting down + break; + + loopfilter_frame(cpi, cm); + + sem_post(&cpi->h_event_end_lpf); + } + } + + return 0; +} +#endif + static THREAD_FUNCTION thread_encoding_proc(void *p_data) { @@ -479,6 +508,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); } + { + LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; + + sem_init(&cpi->h_event_start_lpf, 0, 0); + sem_init(&cpi->h_event_end_lpf, 0, 0); + + lpfthd->ptr1 = (void *)cpi; + pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd); + } } } @@ -500,9 +538,14 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) sem_destroy(&cpi->h_event_start_encoding[i]); } + + sem_post(&cpi->h_event_start_lpf); + pthread_join(cpi->h_filter_thread, 0); } sem_destroy(&cpi->h_event_end_encoding); + sem_destroy(&cpi->h_event_end_lpf); + sem_destroy(&cpi->h_event_start_lpf); //free thread related resources vpx_free(cpi->h_event_start_encoding); diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 5dc579d10..fcd996d1c 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -3509,6 +3509,89 @@ static BOOL recode_loop_test( VP8_COMP *cpi, return force_recode; } +void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) +{ + if (cm->no_lpf) + { + cm->filter_level = 0; + } + else + { + struct vpx_usec_timer timer; + + vp8_clear_system_state(); + + vpx_usec_timer_start(&timer); + if (cpi->sf.auto_filter == 0) + vp8cx_pick_filter_level_fast(cpi->Source, cpi); + + else + vp8cx_pick_filter_level(cpi->Source, cpi); + + vpx_usec_timer_mark(&timer); + cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); + } + +#if CONFIG_MULTITHREAD + sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */ +#endif + + if (cm->filter_level > 0) + { + vp8cx_set_alt_lf_level(cpi, cm->filter_level); + vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level); + cm->last_filter_type = cm->filter_type; + cm->last_sharpness_level = cm->sharpness_level; + } + + vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); + + { + YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; + YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; + YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; + YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx]; + // At this point the new frame has been encoded. + // If any buffer copy / swapping is signaled it should be done here. + if (cm->frame_type == KEY_FRAME) + { + vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12); + } + else // For non key frames + { + // Code to copy between reference buffers + if (cm->copy_buffer_to_arf) + { + if (cm->copy_buffer_to_arf == 1) + { + if (cm->refresh_last_frame) + // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. + vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12); + else + vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12); + } + else if (cm->copy_buffer_to_arf == 2) + vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12); + } + + if (cm->copy_buffer_to_gf) + { + if (cm->copy_buffer_to_gf == 1) + { + if (cm->refresh_last_frame) + // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. + vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12); + else + vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); + } + else if (cm->copy_buffer_to_gf == 2) + vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12); + } + } + } +} + static void encode_frame_to_data_rate ( VP8_COMP *cpi, @@ -4058,8 +4141,8 @@ static void encode_frame_to_data_rate vp8_setup_key_frame(cpi); // transform / motion compensation build reconstruction frame - vp8_encode_frame(cpi); + cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; @@ -4408,92 +4491,43 @@ static void encode_frame_to_data_rate else cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - if (cm->no_lpf) + +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) { - cm->filter_level = 0; + sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ } else +#endif { - struct vpx_usec_timer timer; - - vpx_usec_timer_start(&timer); - - if (cpi->sf.auto_filter == 0) - vp8cx_pick_filter_level_fast(cpi->Source, cpi); - else - vp8cx_pick_filter_level(cpi->Source, cpi); - - vpx_usec_timer_mark(&timer); - - cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); - } - - if (cm->filter_level > 0) - { - vp8cx_set_alt_lf_level(cpi, cm->filter_level); - vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level); - cm->last_filter_type = cm->filter_type; - cm->last_sharpness_level = cm->sharpness_level; + loopfilter_frame(cpi, cm); } - /* Move storing frame_type out of the above loop since it is also - * needed in motion search besides loopfilter */ - cm->last_frame_type = cm->frame_type; - - vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - if (cpi->oxcf.error_resilient_mode == 1) { cm->refresh_entropy_probs = 0; } +#if CONFIG_MULTITHREAD + /* wait that filter_level is picked so that we can continue with stream packing */ + if (cpi->b_multi_threaded) + sem_wait(&cpi->h_event_end_lpf); +#endif + // build the bitstream vp8_pack_bitstream(cpi, dest, size); +#if CONFIG_MULTITHREAD + /* wait for loopfilter thread done */ + if (cpi->b_multi_threaded) { - YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; - YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; - YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; - YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx]; - // At this point the new frame has been encoded coded. - // If any buffer copy / swaping is signalled it should be done here. - if (cm->frame_type == KEY_FRAME) - { - vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12); - vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12); - } - else // For non key frames - { - // Code to copy between reference buffers - if (cm->copy_buffer_to_arf) - { - if (cm->copy_buffer_to_arf == 1) - { - if (cm->refresh_last_frame) - // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. - vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12); - else - vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12); - } - else if (cm->copy_buffer_to_arf == 2) - vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12); - } - - if (cm->copy_buffer_to_gf) - { - if (cm->copy_buffer_to_gf == 1) - { - if (cm->refresh_last_frame) - // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. - vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12); - else - vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); - } - else if (cm->copy_buffer_to_gf == 2) - vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12); - } - } + sem_wait(&cpi->h_event_end_lpf); } +#endif + + /* Move storing frame_type out of the above loop since it is also + * needed in motion search besides loopfilter */ + cm->last_frame_type = cm->frame_type; // Update rate control heuristics cpi->total_byte_count += (*size); @@ -5325,7 +5359,9 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) + { generate_psnr_packet(cpi); + } #if CONFIG_PSNR diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index b66131d15..057186eb8 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -603,12 +603,17 @@ typedef struct int encoding_thread_count; pthread_t *h_encoding_thread; + pthread_t h_filter_thread; + MB_ROW_COMP *mb_row_ei; ENCODETHREAD_DATA *en_thread_data; + LPFTHREAD_DATA lpf_thread_data; //events sem_t *h_event_start_encoding; sem_t h_event_end_encoding; + sem_t h_event_start_lpf; + sem_t h_event_end_lpf; #endif TOKENLIST *tplist; -- 2.40.0