2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "./vp10_rtcd.h"
15 #include "./vpx_dsp_rtcd.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/mem.h"
20 #include "vpx_ports/system_state.h"
22 #include "vp10/common/common.h"
23 #include "vp10/common/entropy.h"
24 #include "vp10/common/entropymode.h"
25 #include "vp10/common/idct.h"
26 #include "vp10/common/mvref_common.h"
27 #include "vp10/common/pred_common.h"
28 #include "vp10/common/quant_common.h"
29 #include "vp10/common/reconinter.h"
30 #include "vp10/common/reconintra.h"
31 #include "vp10/common/scan.h"
32 #include "vp10/common/seg_common.h"
34 #include "vp10/encoder/cost.h"
35 #include "vp10/encoder/encodemb.h"
36 #include "vp10/encoder/encodemv.h"
37 #include "vp10/encoder/encoder.h"
38 #include "vp10/encoder/mcomp.h"
39 #include "vp10/encoder/palette.h"
40 #include "vp10/encoder/quantize.h"
41 #include "vp10/encoder/ratectrl.h"
42 #include "vp10/encoder/rd.h"
43 #include "vp10/encoder/rdopt.h"
44 #include "vp10/encoder/aq_variance.h"
46 #define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
48 #define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
50 #define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
53 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
55 #define MIN_EARLY_TERM_INDEX 3
56 #define NEW_MV_DISCOUNT_FACTOR 8
60 MV_REFERENCE_FRAME ref_frame[2];
64 MV_REFERENCE_FRAME ref_frame[2];
67 struct rdcost_block_args {
69 ENTROPY_CONTEXT t_above[16];
70 ENTROPY_CONTEXT t_left[16];
77 int use_fast_coef_costing;
82 #define LAST_NEW_MV_INDEX 6
83 static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
84 {NEARESTMV, {LAST_FRAME, NONE}},
85 {NEARESTMV, {ALTREF_FRAME, NONE}},
86 {NEARESTMV, {GOLDEN_FRAME, NONE}},
88 {DC_PRED, {INTRA_FRAME, NONE}},
90 {NEWMV, {LAST_FRAME, NONE}},
91 {NEWMV, {ALTREF_FRAME, NONE}},
92 {NEWMV, {GOLDEN_FRAME, NONE}},
94 {NEARMV, {LAST_FRAME, NONE}},
95 {NEARMV, {ALTREF_FRAME, NONE}},
96 {NEARMV, {GOLDEN_FRAME, NONE}},
98 {ZEROMV, {LAST_FRAME, NONE}},
99 {ZEROMV, {GOLDEN_FRAME, NONE}},
100 {ZEROMV, {ALTREF_FRAME, NONE}},
102 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
103 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
105 {TM_PRED, {INTRA_FRAME, NONE}},
107 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
108 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
109 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
110 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
112 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
113 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
115 {H_PRED, {INTRA_FRAME, NONE}},
116 {V_PRED, {INTRA_FRAME, NONE}},
117 {D135_PRED, {INTRA_FRAME, NONE}},
118 {D207_PRED, {INTRA_FRAME, NONE}},
119 {D153_PRED, {INTRA_FRAME, NONE}},
120 {D63_PRED, {INTRA_FRAME, NONE}},
121 {D117_PRED, {INTRA_FRAME, NONE}},
122 {D45_PRED, {INTRA_FRAME, NONE}},
125 static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
126 {{LAST_FRAME, NONE}},
127 {{GOLDEN_FRAME, NONE}},
128 {{ALTREF_FRAME, NONE}},
129 {{LAST_FRAME, ALTREF_FRAME}},
130 {{GOLDEN_FRAME, ALTREF_FRAME}},
131 {{INTRA_FRAME, NONE}},
134 static INLINE int write_uniform_cost(int n, int v) {
135 int l = get_unsigned_bits(n), m = (1 << l) - n;
139 return (l - 1) * vp10_cost_bit(128, 0);
141 return l * vp10_cost_bit(128, 0);
144 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
145 int m, int n, int min_plane, int max_plane) {
148 for (i = min_plane; i < max_plane; ++i) {
149 struct macroblock_plane *const p = &x->plane[i];
150 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
152 p->coeff = ctx->coeff_pbuf[i][m];
153 p->qcoeff = ctx->qcoeff_pbuf[i][m];
154 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
155 p->eobs = ctx->eobs_pbuf[i][m];
157 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
158 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
159 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
160 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
162 ctx->coeff_pbuf[i][n] = p->coeff;
163 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
164 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
165 ctx->eobs_pbuf[i][n] = p->eobs;
169 static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
170 MACROBLOCK *x, MACROBLOCKD *xd,
171 int *out_rate_sum, int64_t *out_dist_sum,
172 int *skip_txfm_sb, int64_t *skip_sse_sb) {
173 // Note our transform coeffs are 8 times an orthogonal transform.
174 // Hence quantizer step is also 8 times. To get effective quantizer
175 // we need to divide by 8 before sending to modeling function.
177 int64_t rate_sum = 0;
178 int64_t dist_sum = 0;
179 const int ref = xd->mi[0]->mbmi.ref_frame[0];
181 unsigned int var = 0;
182 unsigned int sum_sse = 0;
183 int64_t total_sse = 0;
188 const int dequant_shift =
189 #if CONFIG_VP9_HIGHBITDEPTH
190 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
192 #endif // CONFIG_VP9_HIGHBITDEPTH
195 x->pred_sse[ref] = 0;
197 for (i = 0; i < MAX_MB_PLANE; ++i) {
198 struct macroblock_plane *const p = &x->plane[i];
199 struct macroblockd_plane *const pd = &xd->plane[i];
200 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
201 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
202 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
203 const int64_t dc_thr = p->quant_thred[0] >> shift;
204 const int64_t ac_thr = p->quant_thred[1] >> shift;
205 // The low thresholds are used to measure if the prediction errors are
206 // low enough so that we can skip the mode search.
207 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
208 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
209 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
210 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
212 int lw = b_width_log2_lookup[unit_size] + 2;
213 int lh = b_height_log2_lookup[unit_size] + 2;
217 for (idy = 0; idy < bh; ++idy) {
218 for (idx = 0; idx < bw; ++idx) {
219 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
220 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
221 int block_idx = (idy << 1) + idx;
222 int low_err_skip = 0;
224 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
225 dst, pd->dst.stride, &sse);
226 x->bsse[(i << 2) + block_idx] = sse;
229 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
230 if (!x->select_tx_size) {
231 // Check if all ac coefficients can be quantized to zero.
232 if (var < ac_thr || var == 0) {
233 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
235 // Check if dc coefficient can be quantized to zero.
236 if (sse - var < dc_thr || sse == var) {
237 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
239 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
245 if (skip_flag && !low_err_skip)
249 x->pred_sse[ref] += sse;
253 total_sse += sum_sse;
255 // Fast approximate the modelling function.
256 if (cpi->sf.simple_model_rd_from_var) {
258 const int64_t square_error = sum_sse;
259 int quantizer = (pd->dequant[1] >> dequant_shift);
262 rate = (square_error * (280 - quantizer)) >> 8;
265 dist = (square_error * quantizer) >> 8;
269 vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
270 pd->dequant[1] >> dequant_shift,
277 *skip_txfm_sb = skip_flag;
278 *skip_sse_sb = total_sse << 4;
279 *out_rate_sum = (int)rate_sum;
280 *out_dist_sum = dist_sum << 4;
283 int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
284 intptr_t block_size, int64_t *ssz) {
286 int64_t error = 0, sqcoeff = 0;
288 for (i = 0; i < block_size; i++) {
289 const int diff = coeff[i] - dqcoeff[i];
290 error += diff * diff;
291 sqcoeff += coeff[i] * coeff[i];
298 int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
303 for (i = 0; i < block_size; i++) {
304 const int diff = coeff[i] - dqcoeff[i];
305 error += diff * diff;
311 #if CONFIG_VP9_HIGHBITDEPTH
312 int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
313 const tran_low_t *dqcoeff,
315 int64_t *ssz, int bd) {
317 int64_t error = 0, sqcoeff = 0;
318 int shift = 2 * (bd - 8);
319 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
321 for (i = 0; i < block_size; i++) {
322 const int64_t diff = coeff[i] - dqcoeff[i];
323 error += diff * diff;
324 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
326 assert(error >= 0 && sqcoeff >= 0);
327 error = (error + rounding) >> shift;
328 sqcoeff = (sqcoeff + rounding) >> shift;
333 #endif // CONFIG_VP9_HIGHBITDEPTH
335 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
336 * decide whether to include cost of a trailing EOB node or not (i.e. we
337 * can skip this if the last coefficient in this transform block, e.g. the
338 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
340 static const int16_t band_counts[TX_SIZES][8] = {
341 { 1, 2, 3, 4, 3, 16 - 13, 0 },
342 { 1, 2, 3, 4, 11, 64 - 21, 0 },
343 { 1, 2, 3, 4, 11, 256 - 21, 0 },
344 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
346 static int cost_coeffs(MACROBLOCK *x,
347 int plane, int block,
348 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
350 const int16_t *scan, const int16_t *nb,
351 int use_fast_coef_costing) {
352 MACROBLOCKD *const xd = &x->e_mbd;
353 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
354 const struct macroblock_plane *p = &x->plane[plane];
355 const struct macroblockd_plane *pd = &xd->plane[plane];
356 const PLANE_TYPE type = pd->plane_type;
357 const int16_t *band_count = &band_counts[tx_size][1];
358 const int eob = p->eobs[block];
359 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
360 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
361 x->token_costs[tx_size][type][is_inter_block(mbmi)];
362 uint8_t token_cache[32 * 32];
363 int pt = combine_entropy_contexts(*A, *L);
365 #if CONFIG_VP9_HIGHBITDEPTH
366 const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
368 const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
371 // Check for consistency of tx_size with mode info
372 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
373 : get_uv_tx_size(mbmi, pd) == tx_size);
377 cost = token_costs[0][0][pt][EOB_TOKEN];
380 int band_left = *band_count++;
386 vp10_get_token_extra(v, &prev_t, &e);
387 cost = (*token_costs)[0][pt][prev_t] +
388 vp10_get_cost(prev_t, e, cat6_high_cost);
390 token_cache[0] = vp10_pt_energy_class[prev_t];
394 for (c = 1; c < eob; c++) {
395 const int rc = scan[c];
399 vp10_get_token_extra(v, &t, &e);
400 if (use_fast_coef_costing) {
401 cost += (*token_costs)[!prev_t][!prev_t][t] +
402 vp10_get_cost(t, e, cat6_high_cost);
404 pt = get_coef_context(nb, token_cache, c);
405 cost += (*token_costs)[!prev_t][pt][t] +
406 vp10_get_cost(t, e, cat6_high_cost);
407 token_cache[rc] = vp10_pt_energy_class[t];
411 band_left = *band_count++;
418 if (use_fast_coef_costing) {
419 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
421 pt = get_coef_context(nb, token_cache, c);
422 cost += (*token_costs)[0][pt][EOB_TOKEN];
427 // is eob first coefficient;
433 static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
434 int64_t *out_dist, int64_t *out_sse) {
435 const int ss_txfrm_size = tx_size << 1;
436 MACROBLOCKD* const xd = &x->e_mbd;
437 const struct macroblock_plane *const p = &x->plane[plane];
438 const struct macroblockd_plane *const pd = &xd->plane[plane];
440 int shift = tx_size == TX_32X32 ? 0 : 2;
441 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
442 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
443 #if CONFIG_VP9_HIGHBITDEPTH
444 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
445 *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
446 &this_sse, bd) >> shift;
448 *out_dist = vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
450 #endif // CONFIG_VP9_HIGHBITDEPTH
451 *out_sse = this_sse >> shift;
454 static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
455 TX_SIZE tx_size, struct rdcost_block_args* args) {
457 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
459 return cost_coeffs(args->x, plane, block, args->t_above + x_idx,
460 args->t_left + y_idx, tx_size,
461 args->so->scan, args->so->neighbors,
462 args->use_fast_coef_costing);
465 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
466 TX_SIZE tx_size, void *arg) {
467 struct rdcost_block_args *args = arg;
468 MACROBLOCK *const x = args->x;
469 MACROBLOCKD *const xd = &x->e_mbd;
470 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
471 int64_t rd1, rd2, rd;
476 if (args->exit_early)
479 if (!is_inter_block(mbmi)) {
480 struct encode_b_args arg = {x, NULL, &mbmi->skip};
481 vp10_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
482 dist_block(x, plane, block, tx_size, &dist, &sse);
483 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
484 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
486 // full forward transform and quantization
487 vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
488 dist_block(x, plane, block, tx_size, &dist, &sse);
489 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
491 // compute DC coefficient
492 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
493 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
494 vp10_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
495 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
497 if (x->plane[plane].eobs[block]) {
498 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
499 const int64_t resd_sse = coeff[0] - dqcoeff[0];
500 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
501 #if CONFIG_VP9_HIGHBITDEPTH
502 dc_correct >>= ((xd->bd - 8) * 2);
504 if (tx_size != TX_32X32)
507 dist = VPXMAX(0, sse - dc_correct);
511 // skip forward transform
512 x->plane[plane].eobs[block] = 0;
513 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
517 // full forward transform and quantization
518 vp10_xform_quant(x, plane, block, plane_bsize, tx_size);
519 dist_block(x, plane, block, tx_size, &dist, &sse);
522 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
523 if (args->this_rd + rd > args->best_rd) {
524 args->exit_early = 1;
528 rate = rate_block(plane, block, plane_bsize, tx_size, args);
529 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
530 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
532 // TODO(jingning): temporarily enabled only for luma component
533 rd = VPXMIN(rd1, rd2);
535 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
536 (rd1 > rd2 && !xd->lossless);
538 args->this_rate += rate;
539 args->this_dist += dist;
540 args->this_sse += sse;
543 if (args->this_rd > args->best_rd) {
544 args->exit_early = 1;
548 args->skippable &= !x->plane[plane].eobs[block];
551 static void txfm_rd_in_plane(MACROBLOCK *x,
552 int *rate, int64_t *distortion,
553 int *skippable, int64_t *sse,
554 int64_t ref_best_rd, int plane,
555 BLOCK_SIZE bsize, TX_SIZE tx_size,
556 int use_fast_coef_casting) {
557 MACROBLOCKD *const xd = &x->e_mbd;
558 const struct macroblockd_plane *const pd = &xd->plane[plane];
560 struct rdcost_block_args args;
563 args.best_rd = ref_best_rd;
564 args.use_fast_coef_costing = use_fast_coef_casting;
568 xd->mi[0]->mbmi.tx_size = tx_size;
570 vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
572 tx_type = get_tx_type(pd->plane_type, xd, 0);
573 args.so = get_scan(tx_size, tx_type);
575 vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
576 block_rd_txfm, &args);
577 if (args.exit_early) {
579 *distortion = INT64_MAX;
583 *distortion = args.this_dist;
584 *rate = args.this_rate;
585 *sse = args.this_sse;
586 *skippable = args.skippable;
590 static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
591 int *rate, int64_t *distortion,
592 int *skip, int64_t *sse,
595 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
596 VP10_COMMON *const cm = &cpi->common;
597 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
598 MACROBLOCKD *const xd = &x->e_mbd;
599 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
601 mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
603 txfm_rd_in_plane(x, rate, distortion, skip,
604 sse, ref_best_rd, 0, bs,
605 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
608 static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
615 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
616 VP10_COMMON *const cm = &cpi->common;
617 MACROBLOCKD *const xd = &x->e_mbd;
618 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
619 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
620 int r[TX_SIZES][2], s[TX_SIZES];
621 int64_t d[TX_SIZES], sse[TX_SIZES];
622 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
623 {INT64_MAX, INT64_MAX},
624 {INT64_MAX, INT64_MAX},
625 {INT64_MAX, INT64_MAX}};
628 int64_t best_rd = INT64_MAX;
629 TX_SIZE best_tx = max_tx_size;
630 int start_tx, end_tx;
632 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
633 assert(skip_prob > 0);
634 s0 = vp10_cost_bit(skip_prob, 0);
635 s1 = vp10_cost_bit(skip_prob, 1);
637 if (cm->tx_mode == TX_MODE_SELECT) {
638 start_tx = max_tx_size;
641 TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
642 tx_mode_to_biggest_tx_size[cm->tx_mode]);
643 start_tx = chosen_tx_size;
644 end_tx = chosen_tx_size;
647 for (n = start_tx; n >= end_tx; n--) {
649 for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
651 r_tx_size += vp10_cost_zero(tx_probs[m]);
653 r_tx_size += vp10_cost_one(tx_probs[m]);
655 txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
656 &sse[n], ref_best_rd, 0, bs, n,
657 cpi->sf.use_fast_coef_costing);
659 if (r[n][0] < INT_MAX) {
660 r[n][1] += r_tx_size;
662 if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
663 rd[n][0] = rd[n][1] = INT64_MAX;
665 if (is_inter_block(mbmi)) {
666 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
667 r[n][1] -= r_tx_size;
669 rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
670 rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
673 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
674 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
677 if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
678 rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
679 rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
682 // Early termination in transform size search.
683 if (cpi->sf.tx_size_search_breakout &&
684 (rd[n][1] == INT64_MAX ||
685 (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
689 if (rd[n][1] < best_rd) {
694 mbmi->tx_size = best_tx;
696 *distortion = d[mbmi->tx_size];
697 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
698 *skip = s[mbmi->tx_size];
699 *psse = sse[mbmi->tx_size];
702 static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
703 int64_t *distortion, int *skip,
704 int64_t *psse, BLOCK_SIZE bs,
705 int64_t ref_best_rd) {
706 MACROBLOCKD *xd = &x->e_mbd;
708 int64_t *ret_sse = psse ? psse : &sse;
710 assert(bs == xd->mi[0]->mbmi.sb_type);
712 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
713 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
716 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
721 static int conditional_skipintra(PREDICTION_MODE mode,
722 PREDICTION_MODE best_intra_mode) {
723 if (mode == D117_PRED &&
724 best_intra_mode != V_PRED &&
725 best_intra_mode != D135_PRED)
727 if (mode == D63_PRED &&
728 best_intra_mode != V_PRED &&
729 best_intra_mode != D45_PRED)
731 if (mode == D207_PRED &&
732 best_intra_mode != H_PRED &&
733 best_intra_mode != D45_PRED)
735 if (mode == D153_PRED &&
736 best_intra_mode != H_PRED &&
737 best_intra_mode != D135_PRED)
742 void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
743 int palette_ctx, int dc_mode_cost,
744 PALETTE_MODE_INFO *palette_mode_info,
745 uint8_t *best_palette_color_map,
746 TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
748 MACROBLOCKD *const xd = &x->e_mbd;
749 MODE_INFO *const mic = xd->mi[0];
750 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
751 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
752 int this_rate, this_rate_tokenonly, s;
753 int64_t this_distortion, this_rd;
755 int src_stride = x->plane[0].src.stride;
756 uint8_t *src = x->plane[0].src.buf;
758 #if CONFIG_VP9_HIGHBITDEPTH
759 if (cpi->common.use_highbitdepth)
760 colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
761 cpi->common.bit_depth);
763 #endif // CONFIG_VP9_HIGHBITDEPTH
764 colors = vp10_count_colors(src, src_stride, rows, cols);
765 palette_mode_info->palette_size[0] = 0;
767 if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
770 int color_ctx, color_idx = 0;
771 int color_order[PALETTE_MAX_SIZE];
772 double *data = x->palette_buffer->kmeans_data_buf;
773 uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
774 uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
775 double centroids[PALETTE_MAX_SIZE];
778 PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
779 #if CONFIG_VP9_HIGHBITDEPTH
780 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
781 if (cpi->common.use_highbitdepth)
784 #endif // CONFIG_VP9_HIGHBITDEPTH
787 #if CONFIG_VP9_HIGHBITDEPTH
788 if (cpi->common.use_highbitdepth) {
789 for (r = 0; r < rows; ++r) {
790 for (c = 0; c < cols; ++c) {
791 val = src16[r * src_stride + c];
792 data[r * cols + c] = val;
800 #endif // CONFIG_VP9_HIGHBITDEPTH
801 for (r = 0; r < rows; ++r) {
802 for (c = 0; c < cols; ++c) {
803 val = src[r * src_stride + c];
804 data[r * cols + c] = val;
811 #if CONFIG_VP9_HIGHBITDEPTH
813 #endif // CONFIG_VP9_HIGHBITDEPTH
815 mic->mbmi.mode = DC_PRED;
817 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
819 for (i = 0; i < n; ++i)
820 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
821 vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
823 vp10_insertion_sort(centroids, n);
824 for (i = 0; i < n; ++i)
825 centroids[i] = round(centroids[i]);
830 if (centroids[i] == centroids[i - 1]) {
833 centroids[j] = centroids[j + 1];
842 #if CONFIG_VP9_HIGHBITDEPTH
843 if (cpi->common.use_highbitdepth)
844 for (i = 0; i < k; ++i)
845 mic->mbmi.palette_mode_info.palette_colors[i] =
846 clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth);
848 #endif // CONFIG_VP9_HIGHBITDEPTH
849 for (i = 0; i < k; ++i)
850 pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
851 pmi->palette_size[0] = k;
853 vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
854 for (r = 0; r < rows; ++r)
855 for (c = 0; c < cols; ++c)
856 xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
858 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
859 &s, NULL, bsize, *best_rd);
860 if (this_rate_tokenonly == INT_MAX)
863 this_rate = this_rate_tokenonly + dc_mode_cost +
864 cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
865 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
867 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
869 color_map = xd->plane[0].color_index_map;
870 this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
871 for (i = 0; i < rows; ++i) {
872 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
873 color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
875 for (r = 0; r < k; ++r)
876 if (color_map[i * cols + j] == color_order[r]) {
880 assert(color_idx < k);
882 cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
885 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
887 if (this_rd < *best_rd) {
889 *palette_mode_info = mic->mbmi.palette_mode_info;
890 memcpy(best_palette_color_map, xd->plane[0].color_index_map,
891 rows * cols * sizeof(xd->plane[0].color_index_map[0]));
892 *mode_selected = DC_PRED;
893 *best_tx = mic->mbmi.tx_size;
899 static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
901 PREDICTION_MODE *best_mode,
902 const int *bmode_costs,
903 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
904 int *bestrate, int *bestratey,
905 int64_t *bestdistortion,
906 BLOCK_SIZE bsize, int64_t rd_thresh) {
907 PREDICTION_MODE mode;
908 MACROBLOCKD *const xd = &x->e_mbd;
909 int64_t best_rd = rd_thresh;
910 struct macroblock_plane *p = &x->plane[0];
911 struct macroblockd_plane *pd = &xd->plane[0];
912 const int src_stride = p->src.stride;
913 const int dst_stride = pd->dst.stride;
914 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
915 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
916 ENTROPY_CONTEXT ta[2], tempa[2];
917 ENTROPY_CONTEXT tl[2], templ[2];
918 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
919 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
921 uint8_t best_dst[8 * 8];
922 #if CONFIG_VP9_HIGHBITDEPTH
923 uint16_t best_dst16[8 * 8];
926 memcpy(ta, a, sizeof(ta));
927 memcpy(tl, l, sizeof(tl));
928 xd->mi[0]->mbmi.tx_size = TX_4X4;
929 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
931 #if CONFIG_VP9_HIGHBITDEPTH
932 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
933 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
936 int64_t distortion = 0;
937 int rate = bmode_costs[mode];
939 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
942 // Only do the oblique modes if the best so far is
943 // one of the neighboring directional modes
944 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
945 if (conditional_skipintra(mode, *best_mode))
949 memcpy(tempa, ta, sizeof(ta));
950 memcpy(templ, tl, sizeof(tl));
952 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
953 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
954 const int block = (row + idy) * 2 + (col + idx);
955 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
956 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
957 int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
960 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
961 xd->mi[0]->bmi[block].as_mode = mode;
962 vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
964 col + idx, row + idy, 0);
965 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
966 dst, dst_stride, xd->bd);
968 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
969 const scan_order *so = get_scan(TX_4X4, tx_type);
970 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
971 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
972 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
973 so->scan, so->neighbors,
974 cpi->sf.use_fast_coef_costing);
975 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
977 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
978 dst, dst_stride, p->eobs[block],
982 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
983 const scan_order *so = get_scan(TX_4X4, tx_type);
984 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
985 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
986 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
987 so->scan, so->neighbors,
988 cpi->sf.use_fast_coef_costing);
989 distortion += vp10_highbd_block_error(
990 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
991 16, &unused, xd->bd) >> 2;
992 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
994 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
995 dst, dst_stride, p->eobs[block],
1002 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1004 if (this_rd < best_rd) {
1007 *bestdistortion = distortion;
1010 memcpy(a, tempa, sizeof(tempa));
1011 memcpy(l, templ, sizeof(templ));
1012 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1013 memcpy(best_dst16 + idy * 8,
1014 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1015 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1021 if (best_rd >= rd_thresh)
1024 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1025 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1026 best_dst16 + idy * 8,
1027 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1032 #endif // CONFIG_VP9_HIGHBITDEPTH
1034 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1037 int64_t distortion = 0;
1038 int rate = bmode_costs[mode];
1040 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1043 // Only do the oblique modes if the best so far is
1044 // one of the neighboring directional modes
1045 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1046 if (conditional_skipintra(mode, *best_mode))
1050 memcpy(tempa, ta, sizeof(ta));
1051 memcpy(templ, tl, sizeof(tl));
1053 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1054 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1055 const int block = (row + idy) * 2 + (col + idx);
1056 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1057 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1058 int16_t *const src_diff =
1059 vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1060 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1061 xd->mi[0]->bmi[block].as_mode = mode;
1062 vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
1063 dst, dst_stride, col + idx, row + idy, 0);
1064 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1067 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
1068 const scan_order *so = get_scan(TX_4X4, tx_type);
1069 vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
1070 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1071 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1072 so->scan, so->neighbors,
1073 cpi->sf.use_fast_coef_costing);
1074 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1076 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1077 dst, dst_stride, p->eobs[block], DCT_DCT, 1);
1080 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
1081 const scan_order *so = get_scan(TX_4X4, tx_type);
1082 vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
1083 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1084 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1085 so->scan, so->neighbors,
1086 cpi->sf.use_fast_coef_costing);
1087 distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1089 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1091 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1092 dst, dst_stride, p->eobs[block], tx_type, 0);
1098 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1100 if (this_rd < best_rd) {
1103 *bestdistortion = distortion;
1106 memcpy(a, tempa, sizeof(tempa));
1107 memcpy(l, templ, sizeof(templ));
1108 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1109 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1110 num_4x4_blocks_wide * 4);
1116 if (best_rd >= rd_thresh)
1119 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1120 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1121 num_4x4_blocks_wide * 4);
1126 static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
1127 int *rate, int *rate_y,
1128 int64_t *distortion,
1131 const MACROBLOCKD *const xd = &mb->e_mbd;
1132 MODE_INFO *const mic = xd->mi[0];
1133 const MODE_INFO *above_mi = xd->above_mi;
1134 const MODE_INFO *left_mi = xd->left_mi;
1135 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1136 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1137 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1140 int64_t total_distortion = 0;
1142 int64_t total_rd = 0;
1143 ENTROPY_CONTEXT t_above[4], t_left[4];
1144 const int *bmode_costs = cpi->mbmode_cost;
1146 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1147 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1149 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1150 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1151 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1152 PREDICTION_MODE best_mode = DC_PRED;
1153 int r = INT_MAX, ry = INT_MAX;
1154 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1156 if (cpi->common.frame_type == KEY_FRAME) {
1157 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
1158 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
1160 bmode_costs = cpi->y_mode_costs[A][L];
1163 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
1164 bmode_costs, t_above + idx, t_left + idy,
1165 &r, &ry, &d, bsize, best_rd - total_rd);
1166 if (this_rd >= best_rd - total_rd)
1169 total_rd += this_rd;
1171 total_distortion += d;
1174 mic->bmi[i].as_mode = best_mode;
1175 for (j = 1; j < num_4x4_blocks_high; ++j)
1176 mic->bmi[i + j * 2].as_mode = best_mode;
1177 for (j = 1; j < num_4x4_blocks_wide; ++j)
1178 mic->bmi[i + j].as_mode = best_mode;
1180 if (total_rd >= best_rd)
1186 *rate_y = tot_rate_y;
1187 *distortion = total_distortion;
1188 mic->mbmi.mode = mic->bmi[3].as_mode;
1190 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1193 // This function is used only for intra_only frames
1194 static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
1195 int *rate, int *rate_tokenonly,
1196 int64_t *distortion, int *skippable,
1199 PREDICTION_MODE mode;
1200 PREDICTION_MODE mode_selected = DC_PRED;
1201 MACROBLOCKD *const xd = &x->e_mbd;
1202 MODE_INFO *const mic = xd->mi[0];
1203 int this_rate, this_rate_tokenonly, s;
1204 int64_t this_distortion, this_rd;
1205 TX_SIZE best_tx = TX_4X4;
1207 PALETTE_MODE_INFO palette_mode_info;
1208 uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
1209 x->palette_buffer->best_palette_color_map : NULL;
1210 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1211 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1212 int palette_ctx = 0;
1213 const MODE_INFO *above_mi = xd->above_mi;
1214 const MODE_INFO *left_mi = xd->left_mi;
1215 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
1216 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
1217 bmode_costs = cpi->y_mode_costs[A][L];
1219 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1220 palette_mode_info.palette_size[0] = 0;
1221 mic->mbmi.palette_mode_info.palette_size[0] = 0;
1223 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1225 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1227 /* Y Search for intra prediction mode */
1228 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1229 mic->mbmi.mode = mode;
1231 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1232 &s, NULL, bsize, best_rd);
1234 if (this_rate_tokenonly == INT_MAX)
1237 this_rate = this_rate_tokenonly + bmode_costs[mode];
1238 if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
1240 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1242 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1244 if (this_rd < best_rd) {
1245 mode_selected = mode;
1247 best_tx = mic->mbmi.tx_size;
1249 *rate_tokenonly = this_rate_tokenonly;
1250 *distortion = this_distortion;
1255 if (cpi->common.allow_screen_content_tools)
1256 rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
1257 &palette_mode_info, best_palette_color_map,
1258 &best_tx, &mode_selected, &best_rd);
1260 mic->mbmi.mode = mode_selected;
1261 mic->mbmi.tx_size = best_tx;
1262 mic->mbmi.palette_mode_info.palette_size[0] =
1263 palette_mode_info.palette_size[0];
1264 if (palette_mode_info.palette_size[0] > 0) {
1265 memcpy(mic->mbmi.palette_mode_info.palette_colors,
1266 palette_mode_info.palette_colors,
1267 PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
1268 memcpy(xd->plane[0].color_index_map, best_palette_color_map,
1269 rows * cols * sizeof(best_palette_color_map[0]));
1275 // Return value 0: early termination triggered, no valid rd cost available;
1276 // 1: rd cost values are valid.
1277 static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
1278 int *rate, int64_t *distortion, int *skippable,
1279 int64_t *sse, BLOCK_SIZE bsize,
1280 int64_t ref_best_rd) {
1281 MACROBLOCKD *const xd = &x->e_mbd;
1282 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1283 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
1285 int pnrate = 0, pnskip = 1;
1286 int64_t pndist = 0, pnsse = 0;
1287 int is_cost_valid = 1;
1289 if (ref_best_rd < 0)
1292 if (is_inter_block(mbmi) && is_cost_valid) {
1294 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1295 vp10_subtract_plane(x, bsize, plane);
1303 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1304 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1305 ref_best_rd, plane, bsize, uv_tx_size,
1306 cpi->sf.use_fast_coef_costing);
1307 if (pnrate == INT_MAX) {
1312 *distortion += pndist;
1314 *skippable &= pnskip;
1317 if (!is_cost_valid) {
1320 *distortion = INT64_MAX;
1325 return is_cost_valid;
1328 static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
1329 PICK_MODE_CONTEXT *ctx,
1330 int *rate, int *rate_tokenonly,
1331 int64_t *distortion, int *skippable,
1332 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1333 MACROBLOCKD *xd = &x->e_mbd;
1334 PREDICTION_MODE mode;
1335 PREDICTION_MODE mode_selected = DC_PRED;
1336 int64_t best_rd = INT64_MAX, this_rd;
1337 int this_rate_tokenonly, this_rate, s;
1338 int64_t this_distortion, this_sse;
1340 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1341 xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
1342 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1343 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1346 xd->mi[0]->mbmi.uv_mode = mode;
1348 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
1349 &this_distortion, &s, &this_sse, bsize, best_rd))
1351 this_rate = this_rate_tokenonly +
1352 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
1353 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1355 if (this_rd < best_rd) {
1356 mode_selected = mode;
1359 *rate_tokenonly = this_rate_tokenonly;
1360 *distortion = this_distortion;
1362 if (!x->select_tx_size)
1363 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
1367 xd->mi[0]->mbmi.uv_mode = mode_selected;
1371 static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
1372 int *rate, int *rate_tokenonly,
1373 int64_t *distortion, int *skippable,
1375 const VP10_COMMON *cm = &cpi->common;
1378 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
1379 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1380 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1381 skippable, &unused, bsize, INT64_MAX);
1382 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1383 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1386 static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
1387 PICK_MODE_CONTEXT *ctx,
1388 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1389 int *rate_uv, int *rate_uv_tokenonly,
1390 int64_t *dist_uv, int *skip_uv,
1391 PREDICTION_MODE *mode_uv) {
1392 // Use an estimated rd for uv_intra based on DC_PRED if the
1393 // appropriate speed flag is set.
1394 if (cpi->sf.use_uv_intra_rd_estimate) {
1395 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1396 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1397 // Else do a proper rd search for each possible transform size that may
1398 // be considered in the main rd loop.
1400 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1401 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1402 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1404 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1407 static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
1409 assert(is_inter_mode(mode));
1410 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1413 static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1415 PREDICTION_MODE mode, int_mv this_mv[2],
1416 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1417 int_mv seg_mvs[MAX_REF_FRAMES],
1418 int_mv *best_ref_mv[2], const int *mvjcost,
1420 MODE_INFO *const mic = xd->mi[0];
1421 const MB_MODE_INFO *const mbmi = &mic->mbmi;
1422 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1425 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1426 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1427 const int is_compound = has_second_ref(mbmi);
1431 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1432 thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1433 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1435 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1436 thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1437 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1442 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1444 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1447 this_mv[0].as_int = 0;
1449 this_mv[1].as_int = 0;
1455 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1457 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1459 mic->bmi[i].as_mode = mode;
1461 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1462 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1463 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
1465 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
1469 static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
1474 int64_t *distortion, int64_t *sse,
1475 ENTROPY_CONTEXT *ta,
1476 ENTROPY_CONTEXT *tl,
1478 int mi_row, int mi_col) {
1480 MACROBLOCKD *xd = &x->e_mbd;
1481 struct macroblockd_plane *const pd = &xd->plane[0];
1482 struct macroblock_plane *const p = &x->plane[0];
1483 MODE_INFO *const mi = xd->mi[0];
1484 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1485 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1486 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1488 void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
1490 const uint8_t *const src =
1491 &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1492 uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
1494 int64_t thisdistortion = 0, thissse = 0;
1496 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i);
1497 const scan_order *so = get_scan(TX_4X4, tx_type);
1499 vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
1501 #if CONFIG_VP9_HIGHBITDEPTH
1502 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1503 fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4;
1505 fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
1508 fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
1509 #endif // CONFIG_VP9_HIGHBITDEPTH
1511 #if CONFIG_VP9_HIGHBITDEPTH
1512 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1513 vpx_highbd_subtract_block(
1514 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1515 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
1518 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1519 8, src, p->src.stride, dst, pd->dst.stride);
1522 vpx_subtract_block(height, width,
1523 vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1524 8, src, p->src.stride, dst, pd->dst.stride);
1525 #endif // CONFIG_VP9_HIGHBITDEPTH
1528 for (idy = 0; idy < height / 4; ++idy) {
1529 for (idx = 0; idx < width / 4; ++idx) {
1530 int64_t ssz, rd, rd1, rd2;
1533 k += (idy * 2 + idx);
1534 coeff = BLOCK_OFFSET(p->coeff, k);
1535 fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1537 vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1538 #if CONFIG_VP9_HIGHBITDEPTH
1539 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1540 thisdistortion += vp10_highbd_block_error(coeff,
1541 BLOCK_OFFSET(pd->dqcoeff, k),
1544 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1548 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1550 #endif // CONFIG_VP9_HIGHBITDEPTH
1552 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1553 so->scan, so->neighbors,
1554 cpi->sf.use_fast_coef_costing);
1555 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1556 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1557 rd = VPXMIN(rd1, rd2);
1563 *distortion = thisdistortion >> 2;
1564 *labelyrate = thisrate;
1565 *sse = thissse >> 2;
1567 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1578 ENTROPY_CONTEXT ta[2];
1579 ENTROPY_CONTEXT tl[2];
1591 PREDICTION_MODE modes[4];
1592 SEG_RDSTAT rdstat[4][INTER_MODES];
1596 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1597 return (mv->row >> 3) < x->mv_row_min ||
1598 (mv->row >> 3) > x->mv_row_max ||
1599 (mv->col >> 3) < x->mv_col_min ||
1600 (mv->col >> 3) > x->mv_col_max;
1603 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1604 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1605 struct macroblock_plane *const p = &x->plane[0];
1606 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1608 p->src.buf = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i,
1610 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1611 pd->pre[0].buf = &pd->pre[0].buf[vp10_raster_block_offset(BLOCK_8X8, i,
1612 pd->pre[0].stride)];
1613 if (has_second_ref(mbmi))
1614 pd->pre[1].buf = &pd->pre[1].buf[vp10_raster_block_offset(BLOCK_8X8, i,
1615 pd->pre[1].stride)];
1618 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1619 struct buf_2d orig_pre[2]) {
1620 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1621 x->plane[0].src = orig_src;
1622 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1623 if (has_second_ref(mbmi))
1624 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1627 static INLINE int mv_has_subpel(const MV *mv) {
1628 return (mv->row & 0x0F) || (mv->col & 0x0F);
1631 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1632 // TODO(aconverse): Find out if this is still productive then clean up or remove
1633 static int check_best_zero_mv(
1634 const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1635 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
1636 const MV_REFERENCE_FRAME ref_frames[2]) {
1637 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1638 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1639 (ref_frames[1] == NONE ||
1640 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1641 int rfc = mode_context[ref_frames[0]];
1642 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1643 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1644 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1646 if (this_mode == NEARMV) {
1647 if (c1 > c3) return 0;
1648 } else if (this_mode == NEARESTMV) {
1649 if (c2 > c3) return 0;
1651 assert(this_mode == ZEROMV);
1652 if (ref_frames[1] == NONE) {
1653 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1654 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1657 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1658 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1659 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1660 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1668 static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
1671 int mi_row, int mi_col,
1672 int_mv single_newmv[MAX_REF_FRAMES],
1674 const VP10_COMMON *const cm = &cpi->common;
1675 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1676 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1677 MACROBLOCKD *xd = &x->e_mbd;
1678 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1679 const int refs[2] = {mbmi->ref_frame[0],
1680 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
1683 const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
1684 struct scale_factors sf;
1686 // Do joint motion search in compound mode to get more accurate mv.
1687 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1688 int last_besterr[2] = {INT_MAX, INT_MAX};
1689 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1690 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
1691 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
1694 // Prediction buffer from second frame.
1695 #if CONFIG_VP9_HIGHBITDEPTH
1696 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
1697 uint8_t *second_pred;
1699 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
1700 #endif // CONFIG_VP9_HIGHBITDEPTH
1702 for (ref = 0; ref < 2; ++ref) {
1703 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
1705 if (scaled_ref_frame[ref]) {
1707 // Swap out the reference frame for a version that's been scaled to
1708 // match the resolution of the current frame, allowing the existing
1709 // motion search code to be used without additional modifications.
1710 for (i = 0; i < MAX_MB_PLANE; i++)
1711 backup_yv12[ref][i] = xd->plane[i].pre[ref];
1712 vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1716 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1719 // Since we have scaled the reference frames to match the size of the current
1720 // frame we must use a unit scaling factor during mode selection.
1721 #if CONFIG_VP9_HIGHBITDEPTH
1722 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
1723 cm->width, cm->height,
1724 cm->use_highbitdepth);
1726 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
1727 cm->width, cm->height);
1728 #endif // CONFIG_VP9_HIGHBITDEPTH
1730 // Allow joint search multiple times iteratively for each reference frame
1731 // and break out of the search loop if it couldn't find a better mv.
1732 for (ite = 0; ite < 4; ite++) {
1733 struct buf_2d ref_yv12[2];
1734 int bestsme = INT_MAX;
1735 int sadpb = x->sadperbit16;
1737 int search_range = 3;
1739 int tmp_col_min = x->mv_col_min;
1740 int tmp_col_max = x->mv_col_max;
1741 int tmp_row_min = x->mv_row_min;
1742 int tmp_row_max = x->mv_row_max;
1743 int id = ite % 2; // Even iterations search in the first reference frame,
1744 // odd iterations search in the second. The predictor
1745 // found for the 'other' reference frame is factored in.
1747 // Initialized here because of compiler problem in Visual Studio.
1748 ref_yv12[0] = xd->plane[0].pre[0];
1749 ref_yv12[1] = xd->plane[0].pre[1];
1751 // Get the prediction block from the 'other' reference frame.
1752 #if CONFIG_VP9_HIGHBITDEPTH
1753 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1754 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
1755 vp10_highbd_build_inter_predictor(ref_yv12[!id].buf,
1756 ref_yv12[!id].stride,
1758 &frame_mv[refs[!id]].as_mv,
1760 kernel, MV_PRECISION_Q3,
1761 mi_col * MI_SIZE, mi_row * MI_SIZE,
1764 second_pred = (uint8_t *)second_pred_alloc_16;
1765 vp10_build_inter_predictor(ref_yv12[!id].buf,
1766 ref_yv12[!id].stride,
1768 &frame_mv[refs[!id]].as_mv,
1770 kernel, MV_PRECISION_Q3,
1771 mi_col * MI_SIZE, mi_row * MI_SIZE);
1774 vp10_build_inter_predictor(ref_yv12[!id].buf,
1775 ref_yv12[!id].stride,
1777 &frame_mv[refs[!id]].as_mv,
1779 kernel, MV_PRECISION_Q3,
1780 mi_col * MI_SIZE, mi_row * MI_SIZE);
1781 #endif // CONFIG_VP9_HIGHBITDEPTH
1783 // Do compound motion search on the current reference frame.
1785 xd->plane[0].pre[0] = ref_yv12[id];
1786 vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
1788 // Use the mv result from the single mode as mv predictor.
1789 tmp_mv = frame_mv[refs[id]].as_mv;
1794 // Small-range full-pixel motion search.
1795 bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
1797 &cpi->fn_ptr[bsize],
1798 &ref_mv[id].as_mv, second_pred);
1799 if (bestsme < INT_MAX)
1800 bestsme = vp10_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1801 second_pred, &cpi->fn_ptr[bsize], 1);
1803 x->mv_col_min = tmp_col_min;
1804 x->mv_col_max = tmp_col_max;
1805 x->mv_row_min = tmp_row_min;
1806 x->mv_row_max = tmp_row_max;
1808 if (bestsme < INT_MAX) {
1809 int dis; /* TODO: use dis in distortion calculation later. */
1811 bestsme = cpi->find_fractional_mv_step(
1814 cpi->common.allow_high_precision_mv,
1816 &cpi->fn_ptr[bsize],
1817 0, cpi->sf.mv.subpel_iters_per_step,
1819 x->nmvjointcost, x->mvcost,
1820 &dis, &sse, second_pred,
1824 // Restore the pointer to the first (possibly scaled) prediction buffer.
1826 xd->plane[0].pre[0] = ref_yv12[0];
1828 if (bestsme < last_besterr[id]) {
1829 frame_mv[refs[id]].as_mv = tmp_mv;
1830 last_besterr[id] = bestsme;
1838 for (ref = 0; ref < 2; ++ref) {
1839 if (scaled_ref_frame[ref]) {
1840 // Restore the prediction frame pointers to their unscaled versions.
1842 for (i = 0; i < MAX_MB_PLANE; i++)
1843 xd->plane[i].pre[ref] = backup_yv12[ref][i];
1846 *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
1847 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
1848 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1852 static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
1853 int_mv *best_ref_mv,
1854 int_mv *second_best_ref_mv,
1855 int64_t best_rd, int *returntotrate,
1857 int64_t *returndistortion,
1858 int *skippable, int64_t *psse,
1860 int_mv seg_mvs[4][MAX_REF_FRAMES],
1861 BEST_SEG_INFO *bsi_buf, int filter_idx,
1862 int mi_row, int mi_col) {
1864 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1865 MACROBLOCKD *xd = &x->e_mbd;
1866 MODE_INFO *mi = xd->mi[0];
1867 MB_MODE_INFO *mbmi = &mi->mbmi;
1869 int k, br = 0, idx, idy;
1870 int64_t bd = 0, block_sse = 0;
1871 PREDICTION_MODE this_mode;
1872 VP10_COMMON *cm = &cpi->common;
1873 struct macroblock_plane *const p = &x->plane[0];
1874 struct macroblockd_plane *const pd = &xd->plane[0];
1875 const int label_count = 4;
1876 int64_t this_segment_rd = 0;
1877 int label_mv_thresh;
1878 int segmentyrate = 0;
1879 const BLOCK_SIZE bsize = mbmi->sb_type;
1880 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1881 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1882 ENTROPY_CONTEXT t_above[2], t_left[2];
1883 int subpelmv = 1, have_ref = 0;
1884 const int has_second_rf = has_second_ref(mbmi);
1885 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
1886 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1890 bsi->segment_rd = best_rd;
1891 bsi->ref_mv[0] = best_ref_mv;
1892 bsi->ref_mv[1] = second_best_ref_mv;
1893 bsi->mvp.as_int = best_ref_mv->as_int;
1894 bsi->mvthresh = mvthresh;
1896 for (i = 0; i < 4; i++)
1897 bsi->modes[i] = ZEROMV;
1899 memcpy(t_above, pd->above_context, sizeof(t_above));
1900 memcpy(t_left, pd->left_context, sizeof(t_left));
1902 // 64 makes this threshold really big effectively
1903 // making it so that we very rarely check mvs on
1904 // segments. setting this to 1 would make mv thresh
1905 // roughly equal to what it is for macroblocks
1906 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1908 // Segmentation method overheads
1909 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1910 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1911 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1912 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1913 int_mv mode_mv[MB_MODE_COUNT][2];
1914 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1915 PREDICTION_MODE mode_selected = ZEROMV;
1916 int64_t best_rd = INT64_MAX;
1917 const int i = idy * 2 + idx;
1920 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1921 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1922 frame_mv[ZEROMV][frame].as_int = 0;
1923 vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
1924 &frame_mv[NEARESTMV][frame],
1925 &frame_mv[NEARMV][frame],
1926 mbmi_ext->mode_context);
1929 // search for the best motion vector on this segment
1930 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1931 const struct buf_2d orig_src = x->plane[0].src;
1932 struct buf_2d orig_pre[2];
1934 mode_idx = INTER_OFFSET(this_mode);
1935 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1936 if (!(inter_mode_mask & (1 << this_mode)))
1939 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
1940 this_mode, mbmi->ref_frame))
1943 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1944 memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1945 sizeof(bsi->rdstat[i][mode_idx].ta));
1946 memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1947 sizeof(bsi->rdstat[i][mode_idx].tl));
1949 // motion search for newmv (single predictor case only)
1950 if (!has_second_rf && this_mode == NEWMV &&
1951 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1952 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1954 int thissme, bestsme = INT_MAX;
1955 int sadpb = x->sadperbit4;
1960 /* Is the best so far sufficiently good that we cant justify doing
1961 * and new motion search. */
1962 if (best_rd < label_mv_thresh)
1965 if (cpi->oxcf.mode != BEST) {
1966 // use previous block's result as next block's MV predictor.
1968 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1970 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1974 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1977 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1979 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1980 // Take wtd average of the step_params based on the last frame's
1981 // max mv magnitude and the best ref mvs of the current block for
1982 // the given reference.
1983 step_param = (vp10_init_search_range(max_mv) +
1984 cpi->mv_step_param) / 2;
1986 step_param = cpi->mv_step_param;
1989 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1990 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1992 if (cpi->sf.adaptive_motion_search) {
1993 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
1994 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
1995 step_param = VPXMAX(step_param, 8);
1998 // adjust src pointer for this block
2001 vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
2003 bestsme = vp10_full_pixel_search(
2004 cpi, x, bsize, &mvp_full, step_param, sadpb,
2005 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2006 &bsi->ref_mv[0]->as_mv, new_mv,
2009 // Should we do a full search (best quality only)
2010 if (cpi->oxcf.mode == BEST) {
2011 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
2012 /* Check if mvp_full is within the range. */
2013 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
2014 x->mv_row_min, x->mv_row_max);
2015 thissme = cpi->full_search_sad(x, &mvp_full,
2016 sadpb, 16, &cpi->fn_ptr[bsize],
2017 &bsi->ref_mv[0]->as_mv,
2019 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
2020 if (thissme < bestsme) {
2022 *new_mv = best_mv->as_mv;
2024 // The full search result is actually worse so re-instate the
2025 // previous best vector
2026 best_mv->as_mv = *new_mv;
2030 if (bestsme < INT_MAX) {
2032 cpi->find_fractional_mv_step(
2035 &bsi->ref_mv[0]->as_mv,
2036 cm->allow_high_precision_mv,
2037 x->errorperbit, &cpi->fn_ptr[bsize],
2038 cpi->sf.mv.subpel_force_stop,
2039 cpi->sf.mv.subpel_iters_per_step,
2040 cond_cost_list(cpi, cost_list),
2041 x->nmvjointcost, x->mvcost,
2043 &x->pred_sse[mbmi->ref_frame[0]],
2046 // save motion search result for use in compound prediction
2047 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
2050 if (cpi->sf.adaptive_motion_search)
2051 x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
2053 // restore src pointers
2054 mi_buf_restore(x, orig_src, orig_pre);
2057 if (has_second_rf) {
2058 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
2059 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
2063 if (has_second_rf && this_mode == NEWMV &&
2064 mbmi->interp_filter == EIGHTTAP) {
2065 // adjust src pointers
2067 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2069 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
2070 mi_row, mi_col, seg_mvs[i],
2072 seg_mvs[i][mbmi->ref_frame[0]].as_int =
2073 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
2074 seg_mvs[i][mbmi->ref_frame[1]].as_int =
2075 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
2077 // restore src pointers
2078 mi_buf_restore(x, orig_src, orig_pre);
2081 bsi->rdstat[i][mode_idx].brate =
2082 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
2083 frame_mv, seg_mvs[i], bsi->ref_mv,
2084 x->nmvjointcost, x->mvcost);
2086 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2087 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
2088 mode_mv[this_mode][ref].as_int;
2089 if (num_4x4_blocks_wide > 1)
2090 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
2091 mode_mv[this_mode][ref].as_int;
2092 if (num_4x4_blocks_high > 1)
2093 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
2094 mode_mv[this_mode][ref].as_int;
2097 // Trap vectors that reach beyond the UMV borders
2098 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
2100 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
2103 if (filter_idx > 0) {
2104 BEST_SEG_INFO *ref_bsi = bsi_buf;
2108 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2109 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2110 have_ref &= mode_mv[this_mode][ref].as_int ==
2111 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2114 if (filter_idx > 1 && !subpelmv && !have_ref) {
2115 ref_bsi = bsi_buf + 1;
2117 for (ref = 0; ref < 1 + has_second_rf; ++ref)
2118 have_ref &= mode_mv[this_mode][ref].as_int ==
2119 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2122 if (!subpelmv && have_ref &&
2123 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2124 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2125 sizeof(SEG_RDSTAT));
2126 if (num_4x4_blocks_wide > 1)
2127 bsi->rdstat[i + 1][mode_idx].eobs =
2128 ref_bsi->rdstat[i + 1][mode_idx].eobs;
2129 if (num_4x4_blocks_high > 1)
2130 bsi->rdstat[i + 2][mode_idx].eobs =
2131 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2133 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2134 mode_selected = this_mode;
2135 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2141 bsi->rdstat[i][mode_idx].brdcost =
2142 encode_inter_mb_segment(cpi, x,
2143 bsi->segment_rd - this_segment_rd, i,
2144 &bsi->rdstat[i][mode_idx].byrate,
2145 &bsi->rdstat[i][mode_idx].bdist,
2146 &bsi->rdstat[i][mode_idx].bsse,
2147 bsi->rdstat[i][mode_idx].ta,
2148 bsi->rdstat[i][mode_idx].tl,
2151 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2152 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2153 bsi->rdstat[i][mode_idx].brate, 0);
2154 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2155 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2156 if (num_4x4_blocks_wide > 1)
2157 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2158 if (num_4x4_blocks_high > 1)
2159 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2162 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2163 mode_selected = this_mode;
2164 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2166 } /*for each 4x4 mode*/
2168 if (best_rd == INT64_MAX) {
2170 for (iy = i + 1; iy < 4; ++iy)
2171 for (midx = 0; midx < INTER_MODES; ++midx)
2172 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2173 bsi->segment_rd = INT64_MAX;
2177 mode_idx = INTER_OFFSET(mode_selected);
2178 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2179 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2181 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
2182 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2185 br += bsi->rdstat[i][mode_idx].brate;
2186 bd += bsi->rdstat[i][mode_idx].bdist;
2187 block_sse += bsi->rdstat[i][mode_idx].bsse;
2188 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2189 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2191 if (this_segment_rd > bsi->segment_rd) {
2193 for (iy = i + 1; iy < 4; ++iy)
2194 for (midx = 0; midx < INTER_MODES; ++midx)
2195 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2196 bsi->segment_rd = INT64_MAX;
2200 } /* for each label */
2204 bsi->segment_yrate = segmentyrate;
2205 bsi->segment_rd = this_segment_rd;
2206 bsi->sse = block_sse;
2208 // update the coding decisions
2209 for (k = 0; k < 4; ++k)
2210 bsi->modes[k] = mi->bmi[k].as_mode;
2212 if (bsi->segment_rd > best_rd)
2214 /* set it to the best */
2215 for (i = 0; i < 4; i++) {
2216 mode_idx = INTER_OFFSET(bsi->modes[i]);
2217 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2218 if (has_second_ref(mbmi))
2219 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2220 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2221 mi->bmi[i].as_mode = bsi->modes[i];
2225 * used to set mbmi->mv.as_int
2227 *returntotrate = bsi->r;
2228 *returndistortion = bsi->d;
2229 *returnyrate = bsi->segment_yrate;
2230 *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
2232 mbmi->mode = bsi->modes[3];
2234 return bsi->segment_rd;
2237 static void estimate_ref_frame_costs(const VP10_COMMON *cm,
2238 const MACROBLOCKD *xd,
2240 unsigned int *ref_costs_single,
2241 unsigned int *ref_costs_comp,
2242 vpx_prob *comp_mode_p) {
2243 int seg_ref_active = segfeature_active(&cm->seg, segment_id,
2245 if (seg_ref_active) {
2246 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2247 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2250 vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
2251 vpx_prob comp_inter_p = 128;
2253 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2254 comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
2255 *comp_mode_p = comp_inter_p;
2260 ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
2262 if (cm->reference_mode != COMPOUND_REFERENCE) {
2263 vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
2264 vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
2265 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
2267 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2268 base_cost += vp10_cost_bit(comp_inter_p, 0);
2270 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2271 ref_costs_single[ALTREF_FRAME] = base_cost;
2272 ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
2273 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
2274 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
2275 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
2276 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
2278 ref_costs_single[LAST_FRAME] = 512;
2279 ref_costs_single[GOLDEN_FRAME] = 512;
2280 ref_costs_single[ALTREF_FRAME] = 512;
2282 if (cm->reference_mode != SINGLE_REFERENCE) {
2283 vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
2284 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
2286 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2287 base_cost += vp10_cost_bit(comp_inter_p, 1);
2289 ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
2290 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
2292 ref_costs_comp[LAST_FRAME] = 512;
2293 ref_costs_comp[GOLDEN_FRAME] = 512;
2298 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2300 int64_t comp_pred_diff[REFERENCE_MODES],
2301 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
2303 MACROBLOCKD *const xd = &x->e_mbd;
2305 // Take a snapshot of the coding context so it can be
2306 // restored if we decide to encode this way
2307 ctx->skip = x->skip;
2308 ctx->skippable = skippable;
2309 ctx->best_mode_index = mode_index;
2310 ctx->mic = *xd->mi[0];
2311 ctx->mbmi_ext = *x->mbmi_ext;
2312 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2313 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2314 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2316 memcpy(ctx->best_filter_diff, best_filter_diff,
2317 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2320 static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
2321 MV_REFERENCE_FRAME ref_frame,
2322 BLOCK_SIZE block_size,
2323 int mi_row, int mi_col,
2324 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2325 int_mv frame_near_mv[MAX_REF_FRAMES],
2326 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2327 const VP10_COMMON *cm = &cpi->common;
2328 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2329 MACROBLOCKD *const xd = &x->e_mbd;
2330 MODE_INFO *const mi = xd->mi[0];
2331 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
2332 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2333 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2335 assert(yv12 != NULL);
2337 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2338 // use the UV scaling factors.
2339 vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2341 // Gets an initial list of candidate vectors from neighbours and orders them
2342 vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
2343 NULL, NULL, mbmi_ext->mode_context);
2345 // Candidate refinement carried out at encoder and decoder
2346 vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
2347 &frame_nearest_mv[ref_frame],
2348 &frame_near_mv[ref_frame]);
2350 // Further refinement that is encode side only to test the top few candidates
2351 // in full and choose the best as the centre point for subsequent searches.
2352 // The current implementation doesn't support scaling.
2353 if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
2354 vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2355 ref_frame, block_size);
2358 static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
2360 int mi_row, int mi_col,
2361 int_mv *tmp_mv, int *rate_mv) {
2362 MACROBLOCKD *xd = &x->e_mbd;
2363 const VP10_COMMON *cm = &cpi->common;
2364 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2365 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
2366 int bestsme = INT_MAX;
2368 int sadpb = x->sadperbit16;
2370 int ref = mbmi->ref_frame[0];
2371 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2373 int tmp_col_min = x->mv_col_min;
2374 int tmp_col_max = x->mv_col_max;
2375 int tmp_row_min = x->mv_row_min;
2376 int tmp_row_max = x->mv_row_max;
2379 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
2383 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2384 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
2385 pred_mv[2] = x->pred_mv[ref];
2387 if (scaled_ref_frame) {
2389 // Swap out the reference frame for a version that's been scaled to
2390 // match the resolution of the current frame, allowing the existing
2391 // motion search code to be used without additional modifications.
2392 for (i = 0; i < MAX_MB_PLANE; i++)
2393 backup_yv12[i] = xd->plane[i].pre[0];
2395 vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2398 vp10_set_mv_search_range(x, &ref_mv);
2400 // Work out the size of the first step in the mv step search.
2401 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
2402 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2403 // Take wtd average of the step_params based on the last frame's
2404 // max mv magnitude and that based on the best ref mvs of the current
2405 // block for the given reference.
2406 step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
2407 cpi->mv_step_param) / 2;
2409 step_param = cpi->mv_step_param;
2412 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
2414 2 * (b_width_log2_lookup[BLOCK_64X64] -
2415 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
2416 step_param = VPXMAX(step_param, boffset);
2419 if (cpi->sf.adaptive_motion_search) {
2420 int bwl = b_width_log2_lookup[bsize];
2421 int bhl = b_height_log2_lookup[bsize];
2422 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2427 // prev_mv_sad is not setup for dynamically scaled frames.
2428 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
2430 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2431 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2432 x->pred_mv[ref].row = 0;
2433 x->pred_mv[ref].col = 0;
2434 tmp_mv->as_int = INVALID_MV;
2436 if (scaled_ref_frame) {
2438 for (i = 0; i < MAX_MB_PLANE; ++i)
2439 xd->plane[i].pre[0] = backup_yv12[i];
2447 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2452 bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
2453 cond_cost_list(cpi, cost_list),
2454 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
2456 x->mv_col_min = tmp_col_min;
2457 x->mv_col_max = tmp_col_max;
2458 x->mv_row_min = tmp_row_min;
2459 x->mv_row_max = tmp_row_max;
2461 if (bestsme < INT_MAX) {
2462 int dis; /* TODO: use dis in distortion calculation later. */
2463 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2464 cm->allow_high_precision_mv,
2466 &cpi->fn_ptr[bsize],
2467 cpi->sf.mv.subpel_force_stop,
2468 cpi->sf.mv.subpel_iters_per_step,
2469 cond_cost_list(cpi, cost_list),
2470 x->nmvjointcost, x->mvcost,
2471 &dis, &x->pred_sse[ref], NULL, 0, 0);
2473 *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2474 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2476 if (cpi->sf.adaptive_motion_search)
2477 x->pred_mv[ref] = tmp_mv->as_mv;
2479 if (scaled_ref_frame) {
2481 for (i = 0; i < MAX_MB_PLANE; i++)
2482 xd->plane[i].pre[0] = backup_yv12[i];
2488 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2489 uint8_t *orig_dst[MAX_MB_PLANE],
2490 int orig_dst_stride[MAX_MB_PLANE]) {
2492 for (i = 0; i < MAX_MB_PLANE; i++) {
2493 xd->plane[i].dst.buf = orig_dst[i];
2494 xd->plane[i].dst.stride = orig_dst_stride[i];
2498 // In some situations we want to discount tha pparent cost of a new motion
2499 // vector. Where there is a subtle motion field and especially where there is
2500 // low spatial complexity then it can be hard to cover the cost of a new motion
2501 // vector in a single block, even if that motion vector reduces distortion.
2502 // However, once established that vector may be usable through the nearest and
2503 // near mv modes to reduce distortion in subsequent blocks and also improve
2505 static int discount_newmv_test(const VP10_COMP *cpi,
2508 int_mv (*mode_mv)[MAX_REF_FRAMES],
2510 return (!cpi->rc.is_src_frame_alt_ref &&
2511 (this_mode == NEWMV) &&
2512 (this_mv.as_int != 0) &&
2513 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
2514 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
2515 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
2516 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
2519 #define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
2520 #define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
2521 VP9_INTERP_EXTEND) << 3)
2523 // TODO(jingning): this mv clamping function should be block size dependent.
2524 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
2525 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
2526 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
2527 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
2528 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
2531 static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
2533 int *rate2, int64_t *distortion,
2535 int *rate_y, int *rate_uv,
2537 int_mv (*mode_mv)[MAX_REF_FRAMES],
2538 int mi_row, int mi_col,
2539 int_mv single_newmv[MAX_REF_FRAMES],
2540 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2541 int (*single_skippable)[MAX_REF_FRAMES],
2543 const int64_t ref_best_rd,
2544 int64_t *mask_filter,
2545 int64_t filter_cache[]) {
2546 VP10_COMMON *cm = &cpi->common;
2547 MACROBLOCKD *xd = &x->e_mbd;
2548 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2549 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2550 const int is_comp_pred = has_second_ref(mbmi);
2551 const int this_mode = mbmi->mode;
2552 int_mv *frame_mv = mode_mv[this_mode];
2554 int refs[2] = { mbmi->ref_frame[0],
2555 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2557 #if CONFIG_VP9_HIGHBITDEPTH
2558 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
2561 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
2562 #endif // CONFIG_VP9_HIGHBITDEPTH
2563 int pred_exists = 0;
2565 int64_t rd, tmp_rd, best_rd = INT64_MAX;
2566 int best_needs_copy = 0;
2567 uint8_t *orig_dst[MAX_MB_PLANE];
2568 int orig_dst_stride[MAX_MB_PLANE];
2570 INTERP_FILTER best_filter = SWITCHABLE;
2571 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
2572 int64_t bsse[MAX_MB_PLANE << 2] = {0};
2574 int bsl = mi_width_log2_lookup[bsize];
2575 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
2576 (((mi_row + mi_col) >> bsl) +
2577 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
2579 int skip_txfm_sb = 0;
2580 int64_t skip_sse_sb = INT64_MAX;
2581 int64_t distortion_y = 0, distortion_uv = 0;
2583 #if CONFIG_VP9_HIGHBITDEPTH
2584 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2585 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
2587 tmp_buf = (uint8_t *)tmp_buf16;
2589 #endif // CONFIG_VP9_HIGHBITDEPTH
2591 if (pred_filter_search) {
2592 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2593 if (xd->up_available)
2594 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
2595 if (xd->left_available)
2596 lf = xd->mi[-1]->mbmi.interp_filter;
2598 if ((this_mode != NEWMV) || (af == lf))
2603 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2604 frame_mv[refs[1]].as_int == INVALID_MV)
2607 if (cpi->sf.adaptive_mode_search) {
2608 if (single_filter[this_mode][refs[0]] ==
2609 single_filter[this_mode][refs[1]])
2610 best_filter = single_filter[this_mode][refs[0]];
2614 if (this_mode == NEWMV) {
2617 // Initialize mv using single prediction mode result.
2618 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2619 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2621 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2622 joint_motion_search(cpi, x, bsize, frame_mv,
2623 mi_row, mi_col, single_newmv, &rate_mv);
2625 rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2626 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
2627 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2628 rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2629 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
2630 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2635 single_motion_search(cpi, x, bsize, mi_row, mi_col,
2637 if (tmp_mv.as_int == INVALID_MV)
2640 frame_mv[refs[0]].as_int =
2641 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2642 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2644 // Estimate the rate implications of a new mv but discount this
2645 // under certain circumstances where we want to help initiate a weak
2646 // motion field, where the distortion gain for a single block may not
2647 // be enough to overcome the cost of a new mv.
2648 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
2649 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
2656 for (i = 0; i < is_comp_pred + 1; ++i) {
2657 cur_mv[i] = frame_mv[refs[i]];
2658 // Clip "next_nearest" so that it does not extend to far out of image
2659 if (this_mode != NEWMV)
2660 clamp_mv2(&cur_mv[i].as_mv, xd);
2662 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2664 mbmi->mv[i].as_int = cur_mv[i].as_int;
2667 // do first prediction into the destination buffer. Do the next
2668 // prediction into a temporary buffer. Then keep track of which one
2669 // of these currently holds the best predictor, and use the other
2670 // one for future predictions. In the end, copy from tmp_buf to
2671 // dst if necessary.
2672 for (i = 0; i < MAX_MB_PLANE; i++) {
2673 orig_dst[i] = xd->plane[i].dst.buf;
2674 orig_dst_stride[i] = xd->plane[i].dst.stride;
2677 // We don't include the cost of the second reference here, because there
2678 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2679 // words if you present them in that order, the second one is always known
2680 // if the first is known.
2682 // Under some circumstances we discount the cost of new mv mode to encourage
2683 // initiation of a motion field.
2684 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
2685 mode_mv, refs[0])) {
2686 *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
2687 mbmi_ext->mode_context[refs[0]]),
2688 cost_mv_ref(cpi, NEARESTMV,
2689 mbmi_ext->mode_context[refs[0]]));
2691 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
2694 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2695 mbmi->mode != NEARESTMV)
2699 // Are all MVs integer pel for Y and UV
2700 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2702 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2704 // Search for best switchable filter by checking the variance of
2705 // pred error irrespective of whether the filter will be used
2706 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2707 filter_cache[i] = INT64_MAX;
2709 if (cm->interp_filter != BILINEAR) {
2710 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2711 best_filter = EIGHTTAP;
2712 } else if (best_filter == SWITCHABLE) {
2714 int tmp_rate_sum = 0;
2715 int64_t tmp_dist_sum = 0;
2717 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2720 int tmp_skip_sb = 0;
2721 int64_t tmp_skip_sse = INT64_MAX;
2723 mbmi->interp_filter = i;
2724 rs = vp10_get_switchable_rate(cpi, xd);
2725 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2727 if (i > 0 && intpel_mv) {
2728 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2729 filter_cache[i] = rd;
2730 filter_cache[SWITCHABLE_FILTERS] =
2731 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2732 if (cm->interp_filter == SWITCHABLE)
2734 *mask_filter = VPXMAX(*mask_filter, rd);
2737 int64_t dist_sum = 0;
2738 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2739 (cpi->sf.interp_filter_search_mask & (1 << i))) {
2741 dist_sum = INT64_MAX;
2745 if ((cm->interp_filter == SWITCHABLE &&
2746 (!i || best_needs_copy)) ||
2747 (cm->interp_filter != SWITCHABLE &&
2748 (cm->interp_filter == mbmi->interp_filter ||
2749 (i == 0 && intpel_mv)))) {
2750 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2752 for (j = 0; j < MAX_MB_PLANE; j++) {
2753 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2754 xd->plane[j].dst.stride = 64;
2757 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2758 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
2759 &tmp_skip_sb, &tmp_skip_sse);
2761 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2762 filter_cache[i] = rd;
2763 filter_cache[SWITCHABLE_FILTERS] =
2764 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2765 if (cm->interp_filter == SWITCHABLE)
2767 *mask_filter = VPXMAX(*mask_filter, rd);
2769 if (i == 0 && intpel_mv) {
2770 tmp_rate_sum = rate_sum;
2771 tmp_dist_sum = dist_sum;
2775 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2776 if (rd / 2 > ref_best_rd) {
2777 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2781 newbest = i == 0 || rd < best_rd;
2785 best_filter = mbmi->interp_filter;
2786 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2787 best_needs_copy = !best_needs_copy;
2790 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2791 (cm->interp_filter != SWITCHABLE &&
2792 cm->interp_filter == mbmi->interp_filter)) {
2796 skip_txfm_sb = tmp_skip_sb;
2797 skip_sse_sb = tmp_skip_sse;
2798 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2799 memcpy(bsse, x->bsse, sizeof(bsse));
2802 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2805 // Set the appropriate filter
2806 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2807 cm->interp_filter : best_filter;
2808 rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
2811 if (best_needs_copy) {
2812 // again temporarily set the buffers to local memory to prevent a memcpy
2813 for (i = 0; i < MAX_MB_PLANE; i++) {
2814 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2815 xd->plane[i].dst.stride = 64;
2818 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2822 // Handles the special case when a filter that is not in the
2823 // switchable list (ex. bilinear) is indicated at the frame level, or
2824 // skip condition holds.
2825 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2826 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
2827 &skip_txfm_sb, &skip_sse_sb);
2828 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2829 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2830 memcpy(bsse, x->bsse, sizeof(bsse));
2834 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
2836 if (cpi->sf.adaptive_mode_search)
2838 if (single_skippable[this_mode][refs[0]] &&
2839 single_skippable[this_mode][refs[1]])
2840 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
2842 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2843 // if current pred_error modeled rd is substantially more than the best
2844 // so far, do not bother doing full rd
2845 if (rd / 2 > ref_best_rd) {
2846 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2851 if (cm->interp_filter == SWITCHABLE)
2854 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2855 memcpy(x->bsse, bsse, sizeof(bsse));
2857 if (!skip_txfm_sb) {
2858 int skippable_y, skippable_uv;
2859 int64_t sseuv = INT64_MAX;
2860 int64_t rdcosty = INT64_MAX;
2862 // Y cost and distortion
2863 vp10_subtract_plane(x, bsize, 0);
2864 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
2865 bsize, ref_best_rd);
2867 if (*rate_y == INT_MAX) {
2869 *distortion = INT64_MAX;
2870 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2875 *distortion += distortion_y;
2877 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2878 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2880 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
2881 &sseuv, bsize, ref_best_rd - rdcosty)) {
2883 *distortion = INT64_MAX;
2884 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2890 *distortion += distortion_uv;
2891 *skippable = skippable_y && skippable_uv;
2896 // The cost of skip bit needs to be added.
2897 *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
2899 *distortion = skip_sse_sb;
2903 single_skippable[this_mode][refs[0]] = *skippable;
2905 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2906 return 0; // The rate-distortion cost will be re-calculated by caller.
2909 void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
2910 RD_COST *rd_cost, BLOCK_SIZE bsize,
2911 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2912 VP10_COMMON *const cm = &cpi->common;
2913 MACROBLOCKD *const xd = &x->e_mbd;
2914 struct macroblockd_plane *const pd = xd->plane;
2915 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2916 int y_skip = 0, uv_skip = 0;
2917 int64_t dist_y = 0, dist_uv = 0;
2918 TX_SIZE max_uv_tx_size;
2920 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
2921 xd->mi[0]->mbmi.ref_frame[1] = NONE;
2923 if (bsize >= BLOCK_8X8) {
2924 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2925 &dist_y, &y_skip, bsize,
2926 best_rd) >= best_rd) {
2927 rd_cost->rate = INT_MAX;
2932 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2933 &dist_y, best_rd) >= best_rd) {
2934 rd_cost->rate = INT_MAX;
2938 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
2939 pd[1].subsampling_x,
2940 pd[1].subsampling_y);
2941 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2942 &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
2945 if (y_skip && uv_skip) {
2946 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2947 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
2948 rd_cost->dist = dist_y + dist_uv;
2950 rd_cost->rate = rate_y + rate_uv +
2951 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
2952 rd_cost->dist = dist_y + dist_uv;
2955 ctx->mic = *xd->mi[0];
2956 ctx->mbmi_ext = *x->mbmi_ext;
2957 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
2960 // This function is designed to apply a bias or adjustment to an rd value based
2961 // on the relative variance of the source and reconstruction.
2962 #define LOW_VAR_THRESH 16
2963 #define VLOW_ADJ_MAX 25
2964 #define VHIGH_ADJ_MAX 8
2965 static void rd_variance_adjustment(VP10_COMP *cpi,
2969 MV_REFERENCE_FRAME ref_frame,
2970 unsigned int source_variance) {
2971 MACROBLOCKD *const xd = &x->e_mbd;
2972 unsigned int recon_variance;
2973 unsigned int absvar_diff = 0;
2974 int64_t var_error = 0;
2975 int64_t var_factor = 0;
2977 if (*this_rd == INT64_MAX)
2980 #if CONFIG_VP9_HIGHBITDEPTH
2981 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2983 vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
2986 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2990 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2991 #endif // CONFIG_VP9_HIGHBITDEPTH
2993 if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
2994 absvar_diff = (source_variance > recon_variance)
2995 ? (source_variance - recon_variance)
2996 : (recon_variance - source_variance);
2998 var_error = (200 * source_variance * recon_variance) /
2999 ((source_variance * source_variance) +
3000 (recon_variance * recon_variance));
3001 var_error = 100 - var_error;
3004 // Source variance above a threshold and ref frame is intra.
3005 // This case is targeted mainly at discouraging intra modes that give rise
3006 // to a predictor with a low spatial complexity compared to the source.
3007 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
3008 (source_variance > recon_variance)) {
3009 var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
3010 // A second possible case of interest is where the source variance
3011 // is very low and we wish to discourage false texture or motion trails.
3012 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
3013 (recon_variance > source_variance)) {
3014 var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
3016 *this_rd += (*this_rd * var_factor) / 100;
3020 // Do we have an internal image edge (e.g. formatting bars).
3021 int vp10_internal_image_edge(VP10_COMP *cpi) {
3022 return (cpi->oxcf.pass == 2) &&
3023 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
3024 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
3027 // Checks to see if a super block is on a horizontal image edge.
3028 // In most cases this is the "real" edge unless there are formatting
3029 // bars embedded in the stream.
3030 int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
3032 int bottom_edge = cpi->common.mi_rows;
3033 int is_active_h_edge = 0;
3035 // For two pass account for any formatting bars detected.
3036 if (cpi->oxcf.pass == 2) {
3037 TWO_PASS *twopass = &cpi->twopass;
3039 // The inactive region is specified in MBs not mi units.
3040 // The image edge is in the following MB row.
3041 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3043 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3044 bottom_edge = VPXMAX(top_edge, bottom_edge);
3047 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
3048 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
3049 is_active_h_edge = 1;
3051 return is_active_h_edge;
3054 // Checks to see if a super block is on a vertical image edge.
3055 // In most cases this is the "real" edge unless there are formatting
3056 // bars embedded in the stream.
3057 int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
3059 int right_edge = cpi->common.mi_cols;
3060 int is_active_v_edge = 0;
3062 // For two pass account for any formatting bars detected.
3063 if (cpi->oxcf.pass == 2) {
3064 TWO_PASS *twopass = &cpi->twopass;
3066 // The inactive region is specified in MBs not mi units.
3067 // The image edge is in the following MB row.
3068 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3070 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3071 right_edge = VPXMAX(left_edge, right_edge);
3074 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
3075 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
3076 is_active_v_edge = 1;
3078 return is_active_v_edge;
3081 // Checks to see if a super block is at the edge of the active image.
3082 // In most cases this is the "real" edge unless there are formatting
3083 // bars embedded in the stream.
3084 int vp10_active_edge_sb(VP10_COMP *cpi,
3085 int mi_row, int mi_col) {
3086 return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
3087 vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
3090 void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
3091 TileDataEnc *tile_data,
3093 int mi_row, int mi_col,
3094 RD_COST *rd_cost, BLOCK_SIZE bsize,
3095 PICK_MODE_CONTEXT *ctx,
3096 int64_t best_rd_so_far) {
3097 VP10_COMMON *const cm = &cpi->common;
3098 RD_OPT *const rd_opt = &cpi->rd;
3099 SPEED_FEATURES *const sf = &cpi->sf;
3100 MACROBLOCKD *const xd = &x->e_mbd;
3101 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3102 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3103 const struct segmentation *const seg = &cm->seg;
3104 PREDICTION_MODE this_mode;
3105 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3106 unsigned char segment_id = mbmi->segment_id;
3107 int comp_pred, i, k;
3108 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3109 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3110 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3111 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3112 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3113 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3115 int64_t best_rd = best_rd_so_far;
3116 int64_t best_pred_diff[REFERENCE_MODES];
3117 int64_t best_pred_rd[REFERENCE_MODES];
3118 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3119 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3120 MB_MODE_INFO best_mbmode;
3121 int best_mode_skippable = 0;
3122 int midx, best_mode_index = -1;
3123 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3124 vpx_prob comp_mode_p;
3125 int64_t best_intra_rd = INT64_MAX;
3126 unsigned int best_pred_sse = UINT_MAX;
3127 PREDICTION_MODE best_intra_mode = DC_PRED;
3128 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3129 int64_t dist_uv[TX_SIZES];
3130 int skip_uv[TX_SIZES];
3131 PREDICTION_MODE mode_uv[TX_SIZES];
3132 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
3133 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3135 uint8_t ref_frame_skip_mask[2] = { 0 };
3136 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3137 int mode_skip_start = sf->mode_skip_start + 1;
3138 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3139 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3140 int64_t mode_threshold[MAX_MODES];
3141 int *mode_map = tile_data->mode_map[bsize];
3142 const int mode_search_skip_flags = sf->mode_search_skip_flags;
3143 int64_t mask_filter = 0;
3144 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3146 vp10_zero(best_mbmode);
3148 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3149 filter_cache[i] = INT64_MAX;
3151 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3154 for (i = 0; i < REFERENCE_MODES; ++i)
3155 best_pred_rd[i] = INT64_MAX;
3156 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3157 best_filter_rd[i] = INT64_MAX;
3158 for (i = 0; i < TX_SIZES; i++)
3159 rate_uv_intra[i] = INT_MAX;
3160 for (i = 0; i < MAX_REF_FRAMES; ++i)
3161 x->pred_sse[i] = INT_MAX;
3162 for (i = 0; i < MB_MODE_COUNT; ++i) {
3163 for (k = 0; k < MAX_REF_FRAMES; ++k) {
3164 single_inter_filter[i][k] = SWITCHABLE;
3165 single_skippable[i][k] = 0;
3169 rd_cost->rate = INT_MAX;
3171 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3172 x->pred_mv_sad[ref_frame] = INT_MAX;
3173 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3174 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
3175 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3176 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3178 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3179 frame_mv[ZEROMV][ref_frame].as_int = 0;
3182 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3183 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3184 // Skip checking missing references in both single and compound reference
3185 // modes. Note that a mode will be skipped iff both reference frames
3187 ref_frame_skip_mask[0] |= (1 << ref_frame);
3188 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3190 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3191 // Skip fixed mv modes for poor references
3192 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3193 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3198 // If the segment reference frame feature is enabled....
3199 // then do nothing if the current ref frame is not allowed..
3200 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3201 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3202 ref_frame_skip_mask[0] |= (1 << ref_frame);
3203 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3207 // Disable this drop out case if the ref frame
3208 // segment level feature is enabled for this segment. This is to
3209 // prevent the possibility that we end up unable to pick any mode.
3210 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3211 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3212 // unless ARNR filtering is enabled in which case we want
3213 // an unfiltered alternative. We allow near/nearest as well
3214 // because they may result in zero-zero MVs but be cheaper.
3215 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3216 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
3217 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3218 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3219 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3220 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
3221 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3222 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
3226 if (cpi->rc.is_src_frame_alt_ref) {
3227 if (sf->alt_ref_search_fp) {
3228 mode_skip_mask[ALTREF_FRAME] = 0;
3229 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
3230 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3234 if (sf->alt_ref_search_fp)
3235 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
3236 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
3237 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
3239 if (sf->adaptive_mode_search) {
3240 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
3241 cpi->rc.frames_since_golden >= 3)
3242 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
3243 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
3246 if (bsize > sf->max_intra_bsize) {
3247 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
3248 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
3251 mode_skip_mask[INTRA_FRAME] |=
3252 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
3254 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i)
3255 mode_threshold[i] = 0;
3256 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3257 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
3259 midx = sf->schedule_mode_search ? mode_skip_start : 0;
3261 uint8_t end_pos = 0;
3262 for (i = 5; i < midx; ++i) {
3263 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
3264 uint8_t tmp = mode_map[i];
3265 mode_map[i] = mode_map[i - 1];
3266 mode_map[i - 1] = tmp;
3273 mbmi->palette_mode_info.palette_size[0] = 0;
3274 mbmi->palette_mode_info.palette_size[1] = 0;
3275 for (midx = 0; midx < MAX_MODES; ++midx) {
3276 int mode_index = mode_map[midx];
3277 int mode_excluded = 0;
3278 int64_t this_rd = INT64_MAX;
3279 int disable_skip = 0;
3280 int compmode_cost = 0;
3281 int rate2 = 0, rate_y = 0, rate_uv = 0;
3282 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3285 int64_t total_sse = INT64_MAX;
3288 this_mode = vp10_mode_order[mode_index].mode;
3289 ref_frame = vp10_mode_order[mode_index].ref_frame[0];
3290 second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
3292 // Look at the reference frame of the best mode so far and set the
3293 // skip mask to look at a subset of the remaining modes.
3294 if (midx == mode_skip_start && best_mode_index >= 0) {
3295 switch (best_mbmode.ref_frame[0]) {
3299 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
3300 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3303 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
3304 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3307 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
3310 case MAX_REF_FRAMES:
3311 assert(0 && "Invalid Reference frame");
3316 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3317 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3320 if (mode_skip_mask[ref_frame] & (1 << this_mode))
3323 // Test best rd so far against threshold for trying this mode.
3324 if (best_mode_skippable && sf->schedule_mode_search)
3325 mode_threshold[mode_index] <<= 1;
3327 if (best_rd < mode_threshold[mode_index])
3330 comp_pred = second_ref_frame > INTRA_FRAME;
3332 if (!cpi->allow_comp_inter_inter)
3335 // Skip compound inter modes if ARF is not available.
3336 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3339 // Do not allow compound prediction if the segment level reference frame
3340 // feature is in use as in this case there can only be one reference.
3341 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3344 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3345 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
3348 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3350 if (ref_frame != INTRA_FRAME)
3351 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3354 if (ref_frame == INTRA_FRAME) {
3355 if (sf->adaptive_mode_search)
3356 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
3359 if (this_mode != DC_PRED) {
3360 // Disable intra modes other than DC_PRED for blocks with low variance
3361 // Threshold for intra skipping based on source variance
3362 // TODO(debargha): Specialize the threshold for super block sizes
3363 const unsigned int skip_intra_var_thresh = 64;
3364 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3365 x->source_variance < skip_intra_var_thresh)
3367 // Only search the oblique modes if the best so far is
3368 // one of the neighboring directional modes
3369 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3370 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3371 if (best_mode_index >= 0 &&
3372 best_mbmode.ref_frame[0] > INTRA_FRAME)
3375 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3376 if (conditional_skipintra(this_mode, best_intra_mode))
3381 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
3382 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
3383 this_mode, ref_frames))
3387 mbmi->mode = this_mode;
3388 mbmi->uv_mode = DC_PRED;
3389 mbmi->ref_frame[0] = ref_frame;
3390 mbmi->ref_frame[1] = second_ref_frame;
3391 // Evaluate all sub-pel filters irrespective of whether we can use
3392 // them for this frame.
3393 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3394 : cm->interp_filter;
3395 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
3398 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3400 // Select prediction reference frames.
3401 for (i = 0; i < MAX_MB_PLANE; i++) {
3402 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3404 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3407 if (ref_frame == INTRA_FRAME) {
3409 struct macroblockd_plane *const pd = &xd->plane[1];
3410 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
3411 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
3412 NULL, bsize, best_rd);
3413 if (rate_y == INT_MAX)
3416 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
3418 if (rate_uv_intra[uv_tx] == INT_MAX) {
3419 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
3420 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3421 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3424 rate_uv = rate_uv_tokenonly[uv_tx];
3425 distortion_uv = dist_uv[uv_tx];
3426 skippable = skippable && skip_uv[uv_tx];
3427 mbmi->uv_mode = mode_uv[uv_tx];
3429 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3430 if (this_mode != DC_PRED && this_mode != TM_PRED)
3431 rate2 += intra_cost_penalty;
3432 distortion2 = distortion_y + distortion_uv;
3434 this_rd = handle_inter_mode(cpi, x, bsize,
3435 &rate2, &distortion2, &skippable,
3437 &disable_skip, frame_mv,
3439 single_newmv, single_inter_filter,
3440 single_skippable, &total_sse, best_rd,
3441 &mask_filter, filter_cache);
3442 if (this_rd == INT64_MAX)
3445 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
3447 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3448 rate2 += compmode_cost;
3451 // Estimate the reference frame signaling cost and add it
3452 // to the rolling cost variable.
3454 rate2 += ref_costs_comp[ref_frame];
3456 rate2 += ref_costs_single[ref_frame];
3459 if (!disable_skip) {
3461 // Back out the coefficient coding costs
3462 rate2 -= (rate_y + rate_uv);
3464 // Cost the skip mb case
3465 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3466 } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
3467 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3468 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3469 // Add in the cost of the no skip flag.
3470 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
3472 // FIXME(rbultje) make this work for splitmv also
3473 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3474 distortion2 = total_sse;
3475 assert(total_sse >= 0);
3476 rate2 -= (rate_y + rate_uv);
3480 // Add in the cost of the no skip flag.
3481 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
3484 // Calculate the final RD estimate for this mode.
3485 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3488 // Apply an adjustment to the rd value based on the similarity of the
3489 // source variance and reconstructed variance.
3490 rd_variance_adjustment(cpi, x, bsize, &this_rd,
3491 ref_frame, x->source_variance);
3493 if (ref_frame == INTRA_FRAME) {
3494 // Keep record of best intra rd
3495 if (this_rd < best_intra_rd) {
3496 best_intra_rd = this_rd;
3497 best_intra_mode = mbmi->mode;
3501 if (!disable_skip && ref_frame == INTRA_FRAME) {
3502 for (i = 0; i < REFERENCE_MODES; ++i)
3503 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
3504 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3505 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
3508 // Did this mode help.. i.e. is it the new best mode
3509 if (this_rd < best_rd || x->skip) {
3510 int max_plane = MAX_MB_PLANE;
3511 if (!mode_excluded) {
3512 // Note index of best mode so far
3513 best_mode_index = mode_index;
3515 if (ref_frame == INTRA_FRAME) {
3516 /* required for left and above block mv */
3517 mbmi->mv[0].as_int = 0;
3520 best_pred_sse = x->pred_sse[ref_frame];
3523 rd_cost->rate = rate2;
3524 rd_cost->dist = distortion2;
3525 rd_cost->rdcost = this_rd;
3527 best_mbmode = *mbmi;
3528 best_skip2 = this_skip2;
3529 best_mode_skippable = skippable;
3531 if (!x->select_tx_size)
3532 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3533 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3534 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
3536 // TODO(debargha): enhance this test with a better distortion prediction
3537 // based on qp, activity mask and history
3538 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3539 (mode_index > MIN_EARLY_TERM_INDEX)) {
3540 int qstep = xd->plane[0].dequant[1];
3541 // TODO(debargha): Enhance this by specializing for each mode_index
3543 #if CONFIG_VP9_HIGHBITDEPTH
3544 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3545 qstep >>= (xd->bd - 8);
3547 #endif // CONFIG_VP9_HIGHBITDEPTH
3548 if (x->source_variance < UINT_MAX) {
3549 const int var_adjust = (x->source_variance < 16);
3550 scale -= var_adjust;
3552 if (ref_frame > INTRA_FRAME &&
3553 distortion2 * scale < qstep * qstep) {
3560 /* keep record of best compound/single-only prediction */
3561 if (!disable_skip && ref_frame != INTRA_FRAME) {
3562 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3564 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3565 single_rate = rate2 - compmode_cost;
3566 hybrid_rate = rate2;
3568 single_rate = rate2;
3569 hybrid_rate = rate2 + compmode_cost;
3572 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3573 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3576 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3577 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3579 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3580 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3582 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3583 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3585 /* keep record of best filter type */
3586 if (!mode_excluded && cm->interp_filter != BILINEAR) {
3587 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
3588 SWITCHABLE_FILTERS : cm->interp_filter];
3590 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3592 if (ref == INT64_MAX)
3594 else if (filter_cache[i] == INT64_MAX)
3595 // when early termination is triggered, the encoder does not have
3596 // access to the rate-distortion cost. it only knows that the cost
3597 // should be above the maximum valid value. hence it takes the known
3598 // maximum plus an arbitrary constant as the rate-distortion cost.
3599 adj_rd = mask_filter - ref + 10;
3601 adj_rd = filter_cache[i] - ref;
3604 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
3612 if (x->skip && !comp_pred)
3616 // The inter modes' rate costs are not calculated precisely in some cases.
3617 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3618 // ZEROMV. Here, checks are added for those cases, and the mode decisions
3620 if (best_mbmode.mode == NEWMV) {
3621 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
3622 best_mbmode.ref_frame[1]};
3623 int comp_pred_mode = refs[1] > INTRA_FRAME;
3625 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3626 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
3627 best_mbmode.mv[1].as_int) || !comp_pred_mode))
3628 best_mbmode.mode = NEARESTMV;
3629 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3630 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
3631 best_mbmode.mv[1].as_int) || !comp_pred_mode))
3632 best_mbmode.mode = NEARMV;
3633 else if (best_mbmode.mv[0].as_int == 0 &&
3634 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
3635 best_mbmode.mode = ZEROMV;
3638 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
3639 rd_cost->rate = INT_MAX;
3640 rd_cost->rdcost = INT64_MAX;
3644 // If we used an estimate for the uv intra rd in the loop above...
3645 if (sf->use_uv_intra_rd_estimate) {
3646 // Do Intra UV best rd mode selection if best mode choice above was intra.
3647 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3649 *mbmi = best_mbmode;
3650 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3651 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3652 &rate_uv_tokenonly[uv_tx_size],
3653 &dist_uv[uv_tx_size],
3654 &skip_uv[uv_tx_size],
3655 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3660 assert((cm->interp_filter == SWITCHABLE) ||
3661 (cm->interp_filter == best_mbmode.interp_filter) ||
3662 !is_inter_block(&best_mbmode));
3664 if (!cpi->rc.is_src_frame_alt_ref)
3665 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3666 sf->adaptive_rd_thresh, bsize, best_mode_index);
3669 *mbmi = best_mbmode;
3670 x->skip |= best_skip2;
3672 for (i = 0; i < REFERENCE_MODES; ++i) {
3673 if (best_pred_rd[i] == INT64_MAX)
3674 best_pred_diff[i] = INT_MIN;
3676 best_pred_diff[i] = best_rd - best_pred_rd[i];
3680 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3681 if (best_filter_rd[i] == INT64_MAX)
3682 best_filter_diff[i] = 0;
3684 best_filter_diff[i] = best_rd - best_filter_rd[i];
3686 if (cm->interp_filter == SWITCHABLE)
3687 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3689 vp10_zero(best_filter_diff);
3692 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3693 // updating code causes PSNR loss. Need to figure out the confliction.
3694 x->skip |= best_mode_skippable;
3696 if (!x->skip && !x->select_tx_size) {
3697 int has_high_freq_coeff = 0;
3699 int max_plane = is_inter_block(&xd->mi[0]->mbmi)
3701 for (plane = 0; plane < max_plane; ++plane) {
3702 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
3703 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
3706 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
3707 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
3708 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
3711 best_mode_skippable |= !has_high_freq_coeff;
3714 assert(best_mode_index >= 0);
3716 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3717 best_filter_diff, best_mode_skippable);
3720 void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
3721 TileDataEnc *tile_data,
3725 PICK_MODE_CONTEXT *ctx,
3726 int64_t best_rd_so_far) {
3727 VP10_COMMON *const cm = &cpi->common;
3728 MACROBLOCKD *const xd = &x->e_mbd;
3729 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3730 unsigned char segment_id = mbmi->segment_id;
3731 const int comp_pred = 0;
3733 int64_t best_pred_diff[REFERENCE_MODES];
3734 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3735 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3736 vpx_prob comp_mode_p;
3737 INTERP_FILTER best_filter = SWITCHABLE;
3738 int64_t this_rd = INT64_MAX;
3740 const int64_t distortion2 = 0;
3742 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3745 for (i = 0; i < MAX_REF_FRAMES; ++i)
3746 x->pred_sse[i] = INT_MAX;
3747 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
3748 x->pred_mv_sad[i] = INT_MAX;
3750 rd_cost->rate = INT_MAX;
3752 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3754 mbmi->palette_mode_info.palette_size[0] = 0;
3755 mbmi->palette_mode_info.palette_size[1] = 0;
3756 mbmi->mode = ZEROMV;
3757 mbmi->uv_mode = DC_PRED;
3758 mbmi->ref_frame[0] = LAST_FRAME;
3759 mbmi->ref_frame[1] = NONE;
3760 mbmi->mv[0].as_int = 0;
3763 if (cm->interp_filter != BILINEAR) {
3764 best_filter = EIGHTTAP;
3765 if (cm->interp_filter == SWITCHABLE &&
3766 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3768 int best_rs = INT_MAX;
3769 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3770 mbmi->interp_filter = i;
3771 rs = vp10_get_switchable_rate(cpi, xd);
3774 best_filter = mbmi->interp_filter;
3779 // Set the appropriate filter
3780 if (cm->interp_filter == SWITCHABLE) {
3781 mbmi->interp_filter = best_filter;
3782 rate2 += vp10_get_switchable_rate(cpi, xd);
3784 mbmi->interp_filter = cm->interp_filter;
3787 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3788 rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
3790 // Estimate the reference frame signaling cost and add it
3791 // to the rolling cost variable.
3792 rate2 += ref_costs_single[LAST_FRAME];
3793 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3795 rd_cost->rate = rate2;
3796 rd_cost->dist = distortion2;
3797 rd_cost->rdcost = this_rd;
3799 if (this_rd >= best_rd_so_far) {
3800 rd_cost->rate = INT_MAX;
3801 rd_cost->rdcost = INT64_MAX;
3805 assert((cm->interp_filter == SWITCHABLE) ||
3806 (cm->interp_filter == mbmi->interp_filter));
3808 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3809 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
3811 vp10_zero(best_pred_diff);
3812 vp10_zero(best_filter_diff);
3814 if (!x->select_tx_size)
3815 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3816 store_coding_context(x, ctx, THR_ZEROMV,
3817 best_pred_diff, best_filter_diff, 0);
3820 void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
3821 TileDataEnc *tile_data,
3823 int mi_row, int mi_col,
3826 PICK_MODE_CONTEXT *ctx,
3827 int64_t best_rd_so_far) {
3828 VP10_COMMON *const cm = &cpi->common;
3829 RD_OPT *const rd_opt = &cpi->rd;
3830 SPEED_FEATURES *const sf = &cpi->sf;
3831 MACROBLOCKD *const xd = &x->e_mbd;
3832 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3833 const struct segmentation *const seg = &cm->seg;
3834 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3835 unsigned char segment_id = mbmi->segment_id;
3837 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3838 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3839 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3841 int64_t best_rd = best_rd_so_far;
3842 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3843 int64_t best_pred_diff[REFERENCE_MODES];
3844 int64_t best_pred_rd[REFERENCE_MODES];
3845 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3846 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3847 MB_MODE_INFO best_mbmode;
3848 int ref_index, best_ref_index = 0;
3849 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3850 vpx_prob comp_mode_p;
3851 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3852 int rate_uv_intra, rate_uv_tokenonly;
3855 PREDICTION_MODE mode_uv = DC_PRED;
3856 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
3857 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3858 int_mv seg_mvs[4][MAX_REF_FRAMES];
3859 b_mode_info best_bmodes[4];
3861 int ref_frame_skip_mask[2] = { 0 };
3862 int64_t mask_filter = 0;
3863 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3864 int internal_active_edge =
3865 vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
3867 memset(x->zcoeff_blk[TX_4X4], 0, 4);
3868 vp10_zero(best_mbmode);
3870 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3871 filter_cache[i] = INT64_MAX;
3873 for (i = 0; i < 4; i++) {
3875 for (j = 0; j < MAX_REF_FRAMES; j++)
3876 seg_mvs[i][j].as_int = INVALID_MV;
3879 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3882 for (i = 0; i < REFERENCE_MODES; ++i)
3883 best_pred_rd[i] = INT64_MAX;
3884 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3885 best_filter_rd[i] = INT64_MAX;
3886 rate_uv_intra = INT_MAX;
3888 rd_cost->rate = INT_MAX;
3890 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3891 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3892 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3893 frame_mv[NEARESTMV], frame_mv[NEARMV],
3896 ref_frame_skip_mask[0] |= (1 << ref_frame);
3897 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3899 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3900 frame_mv[ZEROMV][ref_frame].as_int = 0;
3903 mbmi->palette_mode_info.palette_size[0] = 0;
3904 mbmi->palette_mode_info.palette_size[1] = 0;
3906 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3907 int mode_excluded = 0;
3908 int64_t this_rd = INT64_MAX;
3909 int disable_skip = 0;
3910 int compmode_cost = 0;
3911 int rate2 = 0, rate_y = 0, rate_uv = 0;
3912 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3916 int64_t total_sse = INT_MAX;
3919 ref_frame = vp10_ref_order[ref_index].ref_frame[0];
3920 second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
3922 // Look at the reference frame of the best mode so far and set the
3923 // skip mask to look at a subset of the remaining modes.
3924 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
3925 if (ref_index == 3) {
3926 switch (best_mbmode.ref_frame[0]) {
3930 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3931 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3934 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3935 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3938 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3941 case MAX_REF_FRAMES:
3942 assert(0 && "Invalid Reference frame");
3948 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3949 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3952 // Test best rd so far against threshold for trying this mode.
3953 if (!internal_active_edge &&
3954 rd_less_than_thresh(best_rd,
3955 rd_opt->threshes[segment_id][bsize][ref_index],
3956 tile_data->thresh_freq_fact[bsize][ref_index]))
3959 comp_pred = second_ref_frame > INTRA_FRAME;
3961 if (!cpi->allow_comp_inter_inter)
3963 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3965 // Do not allow compound prediction if the segment level reference frame
3966 // feature is in use as in this case there can only be one reference.
3967 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3970 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3971 best_mbmode.ref_frame[0] == INTRA_FRAME)
3975 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3977 if (ref_frame > INTRA_FRAME &&
3978 vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3981 if (second_ref_frame > INTRA_FRAME &&
3982 vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3986 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3987 else if (ref_frame != INTRA_FRAME)
3988 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3990 // If the segment reference frame feature is enabled....
3991 // then do nothing if the current ref frame is not allowed..
3992 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3993 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3995 // Disable this drop out case if the ref frame
3996 // segment level feature is enabled for this segment. This is to
3997 // prevent the possibility that we end up unable to pick any mode.
3998 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3999 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4000 // unless ARNR filtering is enabled in which case we want
4001 // an unfiltered alternative. We allow near/nearest as well
4002 // because they may result in zero-zero MVs but be cheaper.
4003 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4007 mbmi->tx_size = TX_4X4;
4008 mbmi->uv_mode = DC_PRED;
4009 mbmi->ref_frame[0] = ref_frame;
4010 mbmi->ref_frame[1] = second_ref_frame;
4011 // Evaluate all sub-pel filters irrespective of whether we can use
4012 // them for this frame.
4013 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4014 : cm->interp_filter;
4016 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4018 // Select prediction reference frames.
4019 for (i = 0; i < MAX_MB_PLANE; i++) {
4020 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4022 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4025 if (ref_frame == INTRA_FRAME) {
4027 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4028 &distortion_y, best_rd) >= best_rd)
4031 rate2 += intra_cost_penalty;
4032 distortion2 += distortion_y;
4034 if (rate_uv_intra == INT_MAX) {
4035 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4,
4041 rate2 += rate_uv_intra;
4042 rate_uv = rate_uv_tokenonly;
4043 distortion2 += dist_uv;
4044 distortion_uv = dist_uv;
4045 mbmi->uv_mode = mode_uv;
4049 int64_t this_rd_thresh;
4050 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4051 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4052 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4053 int tmp_best_skippable = 0;
4054 int switchable_filter_index;
4055 int_mv *second_ref = comp_pred ?
4056 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
4057 b_mode_info tmp_best_bmodes[16];
4058 MB_MODE_INFO tmp_best_mbmode;
4059 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4060 int pred_exists = 0;
4063 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4064 rd_opt->threshes[segment_id][bsize][THR_LAST] :
4065 rd_opt->threshes[segment_id][bsize][THR_ALTR];
4066 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4067 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4068 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4069 filter_cache[i] = INT64_MAX;
4071 if (cm->interp_filter != BILINEAR) {
4072 tmp_best_filter = EIGHTTAP;
4073 if (x->source_variance < sf->disable_filter_search_var_thresh) {
4074 tmp_best_filter = EIGHTTAP;
4075 } else if (sf->adaptive_pred_interp_filter == 1 &&
4076 ctx->pred_interp_filter < SWITCHABLE) {
4077 tmp_best_filter = ctx->pred_interp_filter;
4078 } else if (sf->adaptive_pred_interp_filter == 2) {
4079 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4080 ctx->pred_interp_filter : 0;
4082 for (switchable_filter_index = 0;
4083 switchable_filter_index < SWITCHABLE_FILTERS;
4084 ++switchable_filter_index) {
4087 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4088 mbmi->interp_filter = switchable_filter_index;
4089 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
4090 &mbmi_ext->ref_mvs[ref_frame][0],
4091 second_ref, best_yrd, &rate,
4092 &rate_y, &distortion,
4093 &skippable, &total_sse,
4094 (int) this_rd_thresh, seg_mvs,
4095 bsi, switchable_filter_index,
4098 if (tmp_rd == INT64_MAX)
4100 rs = vp10_get_switchable_rate(cpi, xd);
4101 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4102 filter_cache[switchable_filter_index] = tmp_rd;
4103 filter_cache[SWITCHABLE_FILTERS] =
4104 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
4105 if (cm->interp_filter == SWITCHABLE)
4108 mask_filter = VPXMAX(mask_filter, tmp_rd);
4110 newbest = (tmp_rd < tmp_best_rd);
4112 tmp_best_filter = mbmi->interp_filter;
4113 tmp_best_rd = tmp_rd;
4115 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4116 (mbmi->interp_filter == cm->interp_filter &&
4117 cm->interp_filter != SWITCHABLE)) {
4118 tmp_best_rdu = tmp_rd;
4119 tmp_best_rate = rate;
4120 tmp_best_ratey = rate_y;
4121 tmp_best_distortion = distortion;
4122 tmp_best_sse = total_sse;
4123 tmp_best_skippable = skippable;
4124 tmp_best_mbmode = *mbmi;
4125 for (i = 0; i < 4; i++) {
4126 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4127 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4130 if (switchable_filter_index == 0 &&
4131 sf->use_rd_breakout &&
4132 best_rd < INT64_MAX) {
4133 if (tmp_best_rdu / 2 > best_rd) {
4134 // skip searching the other filters if the first is
4135 // already substantially larger than the best so far
4136 tmp_best_filter = mbmi->interp_filter;
4137 tmp_best_rdu = INT64_MAX;
4142 } // switchable_filter_index loop
4146 if (tmp_best_rdu == INT64_MAX && pred_exists)
4149 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4150 tmp_best_filter : cm->interp_filter);
4152 // Handles the special case when a filter that is not in the
4153 // switchable list (bilinear, 6-tap) is indicated at the frame level
4154 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
4155 &x->mbmi_ext->ref_mvs[ref_frame][0],
4156 second_ref, best_yrd, &rate, &rate_y,
4157 &distortion, &skippable, &total_sse,
4158 (int) this_rd_thresh, seg_mvs, bsi, 0,
4160 if (tmp_rd == INT64_MAX)
4163 total_sse = tmp_best_sse;
4164 rate = tmp_best_rate;
4165 rate_y = tmp_best_ratey;
4166 distortion = tmp_best_distortion;
4167 skippable = tmp_best_skippable;
4168 *mbmi = tmp_best_mbmode;
4169 for (i = 0; i < 4; i++)
4170 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4174 distortion2 += distortion;
4176 if (cm->interp_filter == SWITCHABLE)
4177 rate2 += vp10_get_switchable_rate(cpi, xd);
4180 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4181 : cm->reference_mode == COMPOUND_REFERENCE;
4183 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4185 tmp_best_rdu = best_rd -
4186 VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4187 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4189 if (tmp_best_rdu > 0) {
4190 // If even the 'Y' rd value of split is higher than best so far
4191 // then dont bother looking at UV
4192 vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4194 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
4195 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4196 &uv_sse, BLOCK_8X8, tmp_best_rdu))
4200 distortion2 += distortion_uv;
4201 skippable = skippable && uv_skippable;
4202 total_sse += uv_sse;
4206 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4207 rate2 += compmode_cost;
4209 // Estimate the reference frame signaling cost and add it
4210 // to the rolling cost variable.
4211 if (second_ref_frame > INTRA_FRAME) {
4212 rate2 += ref_costs_comp[ref_frame];
4214 rate2 += ref_costs_single[ref_frame];
4217 if (!disable_skip) {
4218 // Skip is never coded at the segment level for sub8x8 blocks and instead
4219 // always coded in the bitstream at the mode info level.
4221 if (ref_frame != INTRA_FRAME && !xd->lossless) {
4222 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4223 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4224 // Add in the cost of the no skip flag.
4225 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4227 // FIXME(rbultje) make this work for splitmv also
4228 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4229 distortion2 = total_sse;
4230 assert(total_sse >= 0);
4231 rate2 -= (rate_y + rate_uv);
4237 // Add in the cost of the no skip flag.
4238 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4241 // Calculate the final RD estimate for this mode.
4242 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4245 if (!disable_skip && ref_frame == INTRA_FRAME) {
4246 for (i = 0; i < REFERENCE_MODES; ++i)
4247 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
4248 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4249 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
4252 // Did this mode help.. i.e. is it the new best mode
4253 if (this_rd < best_rd || x->skip) {
4254 if (!mode_excluded) {
4255 int max_plane = MAX_MB_PLANE;
4256 // Note index of best mode so far
4257 best_ref_index = ref_index;
4259 if (ref_frame == INTRA_FRAME) {
4260 /* required for left and above block mv */
4261 mbmi->mv[0].as_int = 0;
4265 rd_cost->rate = rate2;
4266 rd_cost->dist = distortion2;
4267 rd_cost->rdcost = this_rd;
4269 best_yrd = best_rd -
4270 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4271 best_mbmode = *mbmi;
4272 best_skip2 = this_skip2;
4273 if (!x->select_tx_size)
4274 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4275 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4276 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
4278 for (i = 0; i < 4; i++)
4279 best_bmodes[i] = xd->mi[0]->bmi[i];
4281 // TODO(debargha): enhance this test with a better distortion prediction
4282 // based on qp, activity mask and history
4283 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4284 (ref_index > MIN_EARLY_TERM_INDEX)) {
4285 int qstep = xd->plane[0].dequant[1];
4286 // TODO(debargha): Enhance this by specializing for each mode_index
4288 #if CONFIG_VP9_HIGHBITDEPTH
4289 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4290 qstep >>= (xd->bd - 8);
4292 #endif // CONFIG_VP9_HIGHBITDEPTH
4293 if (x->source_variance < UINT_MAX) {
4294 const int var_adjust = (x->source_variance < 16);
4295 scale -= var_adjust;
4297 if (ref_frame > INTRA_FRAME &&
4298 distortion2 * scale < qstep * qstep) {
4305 /* keep record of best compound/single-only prediction */
4306 if (!disable_skip && ref_frame != INTRA_FRAME) {
4307 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4309 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4310 single_rate = rate2 - compmode_cost;
4311 hybrid_rate = rate2;
4313 single_rate = rate2;
4314 hybrid_rate = rate2 + compmode_cost;
4317 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4318 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4320 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
4321 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4322 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
4323 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4325 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4326 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4329 /* keep record of best filter type */
4330 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4331 cm->interp_filter != BILINEAR) {
4332 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
4333 SWITCHABLE_FILTERS : cm->interp_filter];
4335 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4336 if (ref == INT64_MAX)
4338 else if (filter_cache[i] == INT64_MAX)
4339 // when early termination is triggered, the encoder does not have
4340 // access to the rate-distortion cost. it only knows that the cost
4341 // should be above the maximum valid value. hence it takes the known
4342 // maximum plus an arbitrary constant as the rate-distortion cost.
4343 adj_rd = mask_filter - ref + 10;
4345 adj_rd = filter_cache[i] - ref;
4348 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
4355 if (x->skip && !comp_pred)
4359 if (best_rd >= best_rd_so_far) {
4360 rd_cost->rate = INT_MAX;
4361 rd_cost->rdcost = INT64_MAX;
4365 // If we used an estimate for the uv intra rd in the loop above...
4366 if (sf->use_uv_intra_rd_estimate) {
4367 // Do Intra UV best rd mode selection if best mode choice above was intra.
4368 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4369 *mbmi = best_mbmode;
4370 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
4378 if (best_rd == INT64_MAX) {
4379 rd_cost->rate = INT_MAX;
4380 rd_cost->dist = INT64_MAX;
4381 rd_cost->rdcost = INT64_MAX;
4385 assert((cm->interp_filter == SWITCHABLE) ||
4386 (cm->interp_filter == best_mbmode.interp_filter) ||
4387 !is_inter_block(&best_mbmode));
4389 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4390 sf->adaptive_rd_thresh, bsize, best_ref_index);
4393 *mbmi = best_mbmode;
4394 x->skip |= best_skip2;
4395 if (!is_inter_block(&best_mbmode)) {
4396 for (i = 0; i < 4; i++)
4397 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4399 for (i = 0; i < 4; ++i)
4400 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4402 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4403 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4406 for (i = 0; i < REFERENCE_MODES; ++i) {
4407 if (best_pred_rd[i] == INT64_MAX)
4408 best_pred_diff[i] = INT_MIN;
4410 best_pred_diff[i] = best_rd - best_pred_rd[i];
4414 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4415 if (best_filter_rd[i] == INT64_MAX)
4416 best_filter_diff[i] = 0;
4418 best_filter_diff[i] = best_rd - best_filter_rd[i];
4420 if (cm->interp_filter == SWITCHABLE)
4421 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4423 vp10_zero(best_filter_diff);
4426 store_coding_context(x, ctx, best_ref_index,
4427 best_pred_diff, best_filter_diff, 0);