x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
}
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
+
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (frame_is_intra_only(cm)) {
vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
- x->rdmult = orig_rdmult;
-
// TODO(jingning) The rate-distortion optimization flow needs to be
// refactored to provide proper exit/return handle.
if (rd_cost->rate == INT_MAX)
else
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ x->rdmult = orig_rdmult;
+
ctx->rate = rd_cost->rate;
ctx->dist = rd_cost->dist;
}
PICK_MODE_CONTEXT *ctx) {
MACROBLOCK *const x = &td->mb;
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
#undef FEATURES
#undef Q_CTX
+int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int orig_rdmult) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost = 0;
+ int64_t mc_dep_cost = 0;
+ int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+ int mi_high = num_8x8_blocks_high_lookup[bsize];
+ int row, col;
+
+ int dr = 0;
+ int count = 0;
+ double r0, rk, beta;
+
+ r0 = cpi->rd.r0;
+
+ for (row = mi_row; row < mi_row + mi_high; ++row) {
+ for (col = mi_col; col < mi_col + mi_wide; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+ if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+ intra_cost += this_stats->intra_cost;
+ mc_dep_cost += this_stats->mc_dep_cost;
+
+ ++count;
+ }
+ }
+
+ rk = (double)intra_cost / (intra_cost + mc_dep_cost);
+ beta = r0 / rk;
+ dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+ dr = VPXMIN(dr, orig_rdmult * 5 / 4);
+ dr = VPXMAX(dr, orig_rdmult * 3 / 4);
+
+ dr = VPXMAX(1, dr);
+ return dr;
+}
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
int must_split = 0;
- int partition_mul = cpi->rd.RDMULT;
+ int partition_mul = cpi->sf.enable_tpl_model ? x->cb_rdmult : cpi->rd.RDMULT;
(void)*tp_orig;
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, td->pc_root);
} else {
+ int orig_rdmult = cpi->rd.RDMULT;
+ x->cb_rdmult = orig_rdmult;
+ if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+ int dr =
+ get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+ x->cb_rdmult = dr;
+ }
+
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
+ } else if (cpi->twopass.gf_group.index) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost_base = 0;
+ int64_t mc_dep_cost_base = 0;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows; ++row) {
+ for (col = 0; col < cm->mi_cols; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+ intra_cost_base += this_stats->intra_cost;
+ mc_dep_cost_base += this_stats->mc_dep_cost;
+ }
+ }
+
+ cpi->rd.r0 = (double)intra_cost_base / (intra_cost_base + mc_dep_cost_base);
}
{
return overlap_area = width * height;
}
+int round_floor(int ref_pos) {
+ int round;
+ if (ref_pos < 0)
+ round = -(1 + (-ref_pos - 1) / MI_SIZE);
+ else
+ round = ref_pos / MI_SIZE;
+
+ return round;
+}
+
void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
int mi_row, int mi_col) {
TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
int ref_pos_col = mi_col * MI_SIZE + mv_col;
// top-left on grid block location
- int grid_pos_row_base = (ref_pos_row >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
- int grid_pos_col_base = (ref_pos_col >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
+ int grid_pos_row_base = round_floor(ref_pos_row) * MI_SIZE;
+ int grid_pos_col_base = round_floor(ref_pos_col) * MI_SIZE;
int block;
for (block = 0; block < 4; ++block) {
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
int overlap_area = get_overlap_area(grid_pos_row, grid_pos_col,
ref_pos_row, ref_pos_col, block);
- int ref_mi_row = grid_pos_row >> MI_SIZE_LOG2;
- int ref_mi_col = grid_pos_col >> MI_SIZE_LOG2;
+ int ref_mi_row = round_floor(grid_pos_row);
+ int ref_mi_col = round_floor(grid_pos_col);
int64_t mc_flow = tpl_stats->mc_dep_cost -
(tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
return (int)rdmult;
}
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+ const VP9_COMMON *cm = &cpi->common;
+ int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t rdmult = 0;
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
+ case VPX_BITS_10:
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+ break;
+ default:
+ assert(cpi->common.bit_depth == VPX_BITS_12);
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+ break;
+ }
+#else
+ int64_t rdmult = (int)((88 * q * q / beta) / 24);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+ const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+ rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ }
+ if (rdmult < 1) rdmult = 1;
+ return (int)rdmult;
+}
+
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
double q;
#if CONFIG_VP9_HIGHBITDEPTH