From: Jerome Jiang Date: Wed, 26 Apr 2017 00:09:29 +0000 (+0000) Subject: Merge "Fix the decoder seg fault when frame is corrupted." X-Git-Tag: v1.7.0~522 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=15ee8a8c4579b1cdb9fd4a1624b0803ab285a731;hp=25c1bada7232e3b873928dfc59fb8e7a039e62ad;p=libvpx Merge "Fix the decoder seg fault when frame is corrupted." --- diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 42dc6830d..bdd666286 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -93,11 +93,6 @@ struct macroblock { int rddiv; int rdmult; int mb_energy; - int *m_search_count_ptr; - int *ex_search_count_ptr; -#if CONFIG_MULTITHREAD - pthread_mutex_t *search_count_mutex; -#endif // These are set to their default values at the beginning, and then adjusted // further in the encoding process. @@ -173,6 +168,8 @@ struct macroblock { uint8_t skip_low_source_sad; + uint8_t lowvar_highsumdiff; + uint8_t last_sb_high_content; // For each superblock: saves the content value (e.g., low/high sad/sumdiff) diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index b92557a9c..5a5ca1f94 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -573,7 +573,8 @@ void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) { int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int content_state) { if ((content_state == kLowSadLowSumdiff) || - (content_state == kHighSadLowSumdiff) || noise_level == kDenHigh) + (content_state == kHighSadLowSumdiff) || + (content_state == kLowVarHighSumdiff) || noise_level == kDenHigh) return (3 * threshold) >> 1; else return (5 * threshold) >> 2; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 481f5a0fd..0a9e49ec2 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -495,11 +495,13 @@ int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, int width, if (width <= 640 && height <= 480) return (5 * threshold_base) >> 2; else if ((content_state == kLowSadLowSumdiff) || - (content_state == kHighSadLowSumdiff)) + (content_state == kHighSadLowSumdiff) || + (content_state == kLowVarHighSumdiff)) return (5 * threshold_base) >> 2; } else if (speed == 7) { if ((content_state == kLowSadLowSumdiff) || - (content_state == kHighSadLowSumdiff)) { + (content_state == kHighSadLowSumdiff) || + (content_state == kLowVarHighSumdiff)) { return (5 * threshold_base) >> 2; } } @@ -536,8 +538,8 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, threshold_base = (7 * threshold_base) >> 3; } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5 && - cpi->denoiser.denoising_level >= kDenLow) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) threshold_base = vp9_scale_part_thresh( threshold_base, cpi->denoiser.denoising_level, content_state); else @@ -946,9 +948,16 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, unsigned int y_sad, int is_key_frame) { int i; MACROBLOCKD *xd = &x->e_mbd; + + if (is_key_frame) return; + // For speed >= 8, avoid the chroma check if y_sad is above threshold. - if (is_key_frame || (cpi->oxcf.speed >= 8 && y_sad > cpi->vbp_thresholds[1])) - return; + if (cpi->oxcf.speed >= 8) { + if (y_sad > cpi->vbp_thresholds[1] && + (!cpi->noise_estimate.enabled || + vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) + return; + } for (i = 1; i <= 2; ++i) { unsigned int uv_sad = UINT_MAX; @@ -994,6 +1003,11 @@ static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, else x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff : kHighSadHighSumdiff; + + // Detect large lighting change. + if (tmp_variance < (tmp_sse >> 3) && (tmp_sse - tmp_variance) > 10000) + x->content_state_sb = kLowVarHighSumdiff; + if (cpi->content_state_sb_fd != NULL) { if (tmp_sad < avg_source_sad_threshold2) { // Cap the increment to 255. @@ -1061,6 +1075,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, content_state == kLowSadHighSumdiff) ? 1 : 0; + x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; if (cpi->content_state_sb_fd != NULL) x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; // If source_sad is low copy the partition without computing the y_sad. @@ -4110,6 +4125,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, x->color_sensitivity[1] = 0; x->sb_is_skin = 0; x->skip_low_source_sad = 0; + x->lowvar_highsumdiff = 0; x->content_state_sb = 0; if (seg->enabled) { @@ -4341,7 +4357,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) { } } #if CONFIG_MULTITHREAD - tile_data->search_count_mutex = NULL; tile_data->enc_row_mt_mutex = NULL; tile_data->row_base_thresh_freq_fact = NULL; #endif @@ -4361,10 +4376,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) { cpi->tplist[tile_row][tile_col] = tplist + tplist_count; tplist = cpi->tplist[tile_row][tile_col]; tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); - - // Set up pointers to per thread motion search counters. - this_tile->m_search_count = 0; // Count of motion search hits. - this_tile->ex_search_count = 0; // Exhaustive mesh search hits. } } } @@ -4409,13 +4420,6 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, const int mi_row_end = tile_info->mi_row_end; int mi_row; - // Set up pointers to per thread motion search counters. - td->mb.m_search_count_ptr = &this_tile->m_search_count; - td->mb.ex_search_count_ptr = &this_tile->ex_search_count; -#if CONFIG_MULTITHREAD - td->mb.search_count_mutex = this_tile->search_count_mutex; -#endif - for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index d9027e001..d82b706be 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3229,10 +3229,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cpi->oxcf.content == VP9E_CONTENT_SCREEN)) vp9_scene_detection_onepass(cpi); - // For 1 pass SVC, since only ZEROMV is allowed for upsampled reference - // frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this - // frame-level upsampling. - if (frame_is_intra_only(cm) == 0 && !is_one_pass_cbr_svc(cpi)) { + // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame + // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can + // avoid this frame-level upsampling (for non intra_only frames). + if (frame_is_intra_only(cm) == 0 && + !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) { vp9_scale_references(cpi); } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 7040ed9a9..64571be64 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -137,6 +137,7 @@ typedef enum { kLowSadHighSumdiff = 2, kHighSadLowSumdiff = 3, kHighSadHighSumdiff = 4, + kLowVarHighSumdiff = 5, } CONTENT_STATE_SB; typedef struct VP9EncoderConfig { @@ -281,15 +282,12 @@ typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int mode_map[BLOCK_SIZES][MAX_MODES]; - int m_search_count; - int ex_search_count; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; // Used for adaptive_rd_thresh with row multithreading int *row_base_thresh_freq_fact; #if CONFIG_MULTITHREAD - pthread_mutex_t *search_count_mutex; pthread_mutex_t *enc_row_mt_mutex; #endif } TileDataEnc; diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 681e960c8..51664112a 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -552,7 +552,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data, const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; int tile_row, tile_col; - TileDataEnc *this_tile; int end_of_frame; int thread_id = thread_data->thread_id; int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id]; @@ -574,13 +573,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data, tile_row = proc_job->tile_row_id; mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE; - this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count; - thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count; -#if CONFIG_MULTITHREAD - thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex; -#endif - vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row); } } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index a3939a5f8..24e23af3b 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1998,18 +1998,6 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, int range = sf->mesh_patterns[0].range; int baseline_interval_divisor; -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex); -#endif - - // Keep track of number of exhaustive calls (this frame in this thread). - ++(*x->ex_search_count_ptr); - -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) - pthread_mutex_unlock(x->search_count_mutex); -#endif - // Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || (interval > range)) @@ -2367,32 +2355,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, return best_sad; } -#define MIN_EX_SEARCH_LIMIT 128 -static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) { - const SPEED_FEATURES *const sf = &cpi->sf; - int is_exhaustive_allowed; - int max_ex; - -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex); -#endif - - max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT, - (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100); - - is_exhaustive_allowed = sf->allow_exhaustive_searches && - (sf->exhaustive_searches_thresh < INT_MAX) && - (*x->ex_search_count_ptr <= max_ex) && - !cpi->rc.is_src_frame_alt_ref; - -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) - pthread_mutex_unlock(x->search_count_mutex); -#endif - - return is_exhaustive_allowed; -} - int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, @@ -2435,21 +2397,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) - pthread_mutex_lock(x->search_count_mutex); -#endif - - // Keep track of number of searches (this frame in this thread). - ++(*x->m_search_count_ptr); - -#if CONFIG_MULTITHREAD - if (NULL != x->search_count_mutex) - pthread_mutex_unlock(x->search_count_mutex); -#endif - // Should we allow a follow on exhaustive search? - if (is_exhaustive_allowed(cpi, x)) { + if ((sf->exhaustive_searches_thresh < INT_MAX) && + !cpi->rc.is_src_frame_alt_ref) { int64_t exhuastive_thr = sf->exhaustive_searches_thresh; exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); diff --git a/vp9/encoder/vp9_multi_thread.c b/vp9/encoder/vp9_multi_thread.c index f5d8e430c..0ad5dcc48 100644 --- a/vp9/encoder/vp9_multi_thread.c +++ b/vp9/encoder/vp9_multi_thread.c @@ -116,11 +116,6 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - CHECK_MEM_ERROR(cm, this_tile->search_count_mutex, - vpx_malloc(sizeof(*this_tile->search_count_mutex))); - - pthread_mutex_init(this_tile->search_count_mutex, NULL); - CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex, vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex))); @@ -170,9 +165,6 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { this_tile->row_base_thresh_freq_fact = NULL; } } - pthread_mutex_destroy(this_tile->search_count_mutex); - vpx_free(this_tile->search_count_mutex); - this_tile->search_count_mutex = NULL; pthread_mutex_destroy(this_tile->enc_row_mt_mutex); vpx_free(this_tile->enc_row_mt_mutex); this_tile->enc_row_mt_mutex = NULL; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index f177814d6..50fb8b6c0 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -170,6 +170,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } vp9_set_mv_search_range(&x->mv_limits, &ref_mv); + // Limit motion vector for large lightning change. + if (cpi->oxcf.speed > 5 && x->lowvar_highsumdiff) { + x->mv_limits.col_min = VPXMAX(x->mv_limits.col_min, -10); + x->mv_limits.row_min = VPXMAX(x->mv_limits.row_min, -10); + x->mv_limits.col_max = VPXMIN(x->mv_limits.col_max, 10); + x->mv_limits.row_max = VPXMIN(x->mv_limits.row_max, 10); + } + assert(x->mv_best_ref_index[ref] <= 2); if (x->mv_best_ref_index[ref] < 2) mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; @@ -354,7 +362,8 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, *sse_y = sse; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->oxcf.speed > 5) ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level, (abs(sum) >> (bw + bh))); else @@ -452,28 +461,32 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, // Transform skipping test in UV planes. for (i = 1; i <= 2; i++) { - struct macroblock_plane *const p = &x->plane[i]; - struct macroblockd_plane *const pd = &xd->plane[i]; - const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); - const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; - const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); - const int uv_bw = b_width_log2_lookup[uv_bsize]; - const int uv_bh = b_height_log2_lookup[uv_bsize]; - const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + - (uv_bh - b_height_log2_lookup[unit_size]); - const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); - const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); - int j = i - 1; - - vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); - var_uv[j] = cpi->fn_ptr[uv_bsize].vf( - p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]); - - if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && - (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) - skip_uv[j] = 1; - else - break; + if (cpi->oxcf.speed < 8 || x->color_sensitivity[i - 1]) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; + const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); + const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; + const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); + const int uv_bw = b_width_log2_lookup[uv_bsize]; + const int uv_bh = b_height_log2_lookup[uv_bsize]; + const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + + (uv_bh - b_height_log2_lookup[unit_size]); + const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); + const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); + int j = i - 1; + + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); + var_uv[j] = cpi->fn_ptr[uv_bsize].vf( + p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]); + + if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && + (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) + skip_uv[j] = 1; + else + break; + } else { + skip_uv[i - 1] = 1; + } } // If the transform in YUV planes are skippable, the mode search checks @@ -481,7 +494,6 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, if (skip_uv[0] & skip_uv[1]) { *early_term = 1; } - return; } @@ -616,7 +628,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int *skippable, int64_t *sse, BLOCK_SIZE bsize, - TX_SIZE tx_size) { + TX_SIZE tx_size, int rd_computed) { MACROBLOCKD *xd = &x->e_mbd; const struct macroblockd_plane *pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; @@ -643,8 +655,9 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, bsize < BLOCK_32X32)) { unsigned int var_y, sse_y; (void)tx_size; - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, - &var_y, &sse_y); + if (!rd_computed) + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, + &var_y, &sse_y); *sse = INT_MAX; *skippable = 0; return; @@ -655,8 +668,9 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, bsize < BLOCK_32X32) { unsigned int var_y, sse_y; (void)tx_size; - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, - &var_y, &sse_y); + if (!rd_computed) + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, + &var_y, &sse_y); *sse = INT_MAX; *skippable = 0; return; @@ -978,7 +992,7 @@ static void estimate_block_intra(int plane, int block, int row, int col, int64_t this_sse = INT64_MAX; // TODO(jingning): This needs further refactoring. block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx, - VPXMIN(tx_size, TX_16X16)); + VPXMIN(tx_size, TX_16X16), 0); } else { unsigned int var = 0; unsigned int sse = 0; @@ -1216,7 +1230,8 @@ static INLINE void find_predictors( static void vp9_NEWMV_diff_bias(const NOISE_ESTIMATE *ne, MACROBLOCKD *xd, PREDICTION_MODE this_mode, RD_COST *this_rdc, BLOCK_SIZE bsize, int mv_row, int mv_col, - int is_last_frame) { + int is_last_frame, int lowvar_highsumdiff, + int is_skin) { // Bias against MVs associated with NEWMV mode that are very different from // top/left neighbors. if (this_mode == NEWMV) { @@ -1263,9 +1278,12 @@ static void vp9_NEWMV_diff_bias(const NOISE_ESTIMATE *ne, MACROBLOCKD *xd, // If noise estimation is enabled, and estimated level is above threshold, // add a bias to LAST reference with small motion, for large blocks. if (ne->enabled && ne->level >= kMedium && bsize >= BLOCK_32X32 && - is_last_frame && mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) { - this_rdc->rdcost = 7 * this_rdc->rdcost >> 3; - } + is_last_frame && mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) + this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3); + else if (lowvar_highsumdiff && !is_skin && bsize >= BLOCK_16X16 && + is_last_frame && mv_row < 16 && mv_row > -16 && mv_col < 16 && + mv_col > -16) + this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3); } #if CONFIG_VP9_TEMPORAL_DENOISING @@ -1608,6 +1626,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int64_t this_sse; int is_skippable; int this_early_term = 0; + int rd_computed = 0; + PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode; ref_frame = ref_mode_set[idx].ref_frame; @@ -1835,12 +1855,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { int pf_rate[3]; int64_t pf_dist[3]; + int curr_rate[3]; unsigned int pf_var[3]; unsigned int pf_sse[3]; TX_SIZE pf_tx_size[3]; int64_t best_cost = INT64_MAX; INTERP_FILTER best_filter = SWITCHABLE, filter; PRED_BUFFER *current_pred = this_mode_pred; + rd_computed = 1; for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) { int64_t cost; @@ -1848,6 +1870,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); + curr_rate[filter] = pf_rate[filter]; pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); pf_tx_size[filter] = mi->tx_size; @@ -1873,7 +1896,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, mi->interp_filter = best_filter; mi->tx_size = pf_tx_size[best_filter]; - this_rdc.rate = pf_rate[best_filter]; + this_rdc.rate = curr_rate[best_filter]; this_rdc.dist = pf_dist[best_filter]; var_y = pf_var[best_filter]; sse_y = pf_sse[best_filter]; @@ -1897,6 +1920,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col, &this_early_term); } else { + rd_computed = 1; model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y); } @@ -1905,7 +1929,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!this_early_term) { this_sse = (int64_t)sse_y; block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize, - VPXMIN(mi->tx_size, TX_16X16)); + VPXMIN(mi->tx_size, TX_16X16), rd_computed); + x->skip_txfm[0] = is_skippable; if (is_skippable) { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); @@ -1956,7 +1981,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, vp9_NEWMV_diff_bias(&cpi->noise_estimate, xd, this_mode, &this_rdc, bsize, frame_mv[this_mode][ref_frame].as_mv.row, frame_mv[this_mode][ref_frame].as_mv.col, - ref_frame == LAST_FRAME); + ref_frame == LAST_FRAME, x->lowvar_highsumdiff, + x->sb_is_skin); } // Skipping checking: test to see if this block can be reconstructed by @@ -2038,7 +2064,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (best_rdc.rdcost == INT64_MAX || ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad)) { + bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad && + !x->lowvar_highsumdiff)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; int i; TX_SIZE best_intra_tx_size = TX_SIZES; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 609fcdbce..fbf13969e 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -20,19 +20,14 @@ static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = { { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 } }; -#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method +// Define 3 mesh density levels to control the number of searches. +#define MESH_DENSITY_LEVELS 3 static MESH_PATTERN - good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { - { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, + good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = { { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } }, { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, - { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, - { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, }; -static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = { - 50, 25, 15, 5, 1, 1 -}; // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality @@ -163,6 +158,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { const int boosted = frame_is_boosted(cpi); + int i; sf->tx_size_search_breakout = 1; sf->adaptive_rd_thresh = 1; @@ -171,6 +167,19 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->use_square_partition_only = !frame_is_boosted(cpi); sf->use_square_only_threshold = BLOCK_16X16; + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { + sf->exhaustive_searches_thresh = (1 << 22); + for (i = 0; i < MAX_MESH_STEP; ++i) { + int mesh_density_level = 0; + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[mesh_density_level][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[mesh_density_level][i].interval; + } + } else { + sf->exhaustive_searches_thresh = INT_MAX; + } + if (speed >= 1) { if (cpi->oxcf.pass == 2) { TWO_PASS *const twopass = &cpi->twopass; @@ -208,6 +217,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 30; + + sf->exhaustive_searches_thresh = + (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) + : INT_MAX; } if (speed >= 2) { @@ -229,6 +242,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->allow_partition_search_skip = 1; sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 45; + + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { + for (i = 0; i < MAX_MESH_STEP; ++i) { + int mesh_density_level = 1; + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[mesh_density_level][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[mesh_density_level][i].interval; + } + } } if (speed >= 3) { @@ -247,6 +270,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; + + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { + for (i = 0; i < MAX_MESH_STEP; ++i) { + int mesh_density_level = 2; + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[mesh_density_level][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[mesh_density_level][i].interval; + } + } } if (speed >= 4) { @@ -325,7 +358,6 @@ static void set_rt_speed_feature_framesize_independent( sf->adaptive_rd_thresh = 1; sf->adaptive_rd_thresh_row_mt = 0; sf->use_fast_coef_costing = 1; - sf->allow_exhaustive_searches = 0; sf->exhaustive_searches_thresh = INT_MAX; sf->allow_acl = 0; sf->copy_partition_flag = 0; @@ -609,7 +641,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // and multiple threads match if (cpi->oxcf.row_mt_bit_exact) { sf->adaptive_rd_thresh = 0; - sf->allow_exhaustive_searches = 0; sf->adaptive_pred_interp_filter = 0; } @@ -711,6 +742,16 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->adaptive_rd_thresh = 1; sf->tx_size_search_breakout = 1; + sf->exhaustive_searches_thresh = + (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20) + : INT_MAX; + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { + for (i = 0; i < MAX_MESH_STEP; ++i) { + sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range; + sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval; + } + } + if (oxcf->mode == REALTIME) set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed, oxcf->content); @@ -720,32 +761,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { cpi->full_search_sad = vp9_full_search_sad; cpi->diamond_search_sad = vp9_diamond_search_sad; - if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { - sf->allow_exhaustive_searches = 1; - if (oxcf->mode == BEST) { - sf->exhaustive_searches_thresh = (1 << 20); - sf->max_exaustive_pct = 100; - for (i = 0; i < MAX_MESH_STEP; ++i) { - sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range; - sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval; - } - } else { - int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed; - sf->exhaustive_searches_thresh = (1 << 22); - sf->max_exaustive_pct = good_quality_max_mesh_pct[speed]; - if (speed > 0) - sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1; - - for (i = 0; i < MAX_MESH_STEP; ++i) { - sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range; - sf->mesh_patterns[i].interval = - good_quality_mesh_patterns[speed][i].interval; - } - } - } else { - sf->allow_exhaustive_searches = 0; - } - // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. if (oxcf->pass == 1) sf->optimize_coefficients = 0; @@ -783,7 +798,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // and multiple threads match if (cpi->oxcf.row_mt_bit_exact) { sf->adaptive_rd_thresh = 0; - sf->allow_exhaustive_searches = 0; sf->adaptive_pred_interp_filter = 0; } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index cbdf8bc30..f41a725e6 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -325,15 +325,9 @@ typedef struct SPEED_FEATURES { // point for this motion search and limits the search range around it. int adaptive_motion_search; - // Flag for allowing some use of exhaustive searches; - int allow_exhaustive_searches; - // Threshold for allowing exhaistive motion search. int exhaustive_searches_thresh; - // Maximum number of exhaustive searches for a frame. - int max_exaustive_pct; - // Pattern to be used for any exhaustive mesh searches. MESH_PATTERN mesh_patterns[MAX_MESH_STEP];