2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
17 #include "vp9/common/vp9_pragmas.h"
18 #include "vp9/encoder/vp9_tokenize.h"
19 #include "vp9/encoder/vp9_treewriter.h"
20 #include "vp9/encoder/vp9_onyx_int.h"
21 #include "vp9/encoder/vp9_modecosts.h"
22 #include "vp9/encoder/vp9_encodeintra.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_findnearmv.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_quantize.h"
30 #include "vp9/encoder/vp9_variance.h"
31 #include "vp9/encoder/vp9_mcomp.h"
32 #include "vp9/encoder/vp9_rdopt.h"
33 #include "vp9/encoder/vp9_ratectrl.h"
34 #include "vpx_mem/vpx_mem.h"
35 #include "vp9/common/vp9_systemdependent.h"
36 #include "vp9/encoder/vp9_encodemv.h"
37 #include "vp9/common/vp9_seg_common.h"
38 #include "vp9/common/vp9_pred_common.h"
39 #include "vp9/common/vp9_entropy.h"
41 #include "vp9/common/vp9_mvref_common.h"
42 #include "vp9/common/vp9_common.h"
44 #define INVALID_MV 0x80008000
46 /* Factor to weigh the rate for switchable interp filters */
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
49 DECLARE_ALIGNED(16, extern const uint8_t,
50 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
52 #define I4X4_PRED 0x8000
53 #define SPLITMV 0x10000
55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 {ZEROMV, LAST_FRAME, NONE},
57 {DC_PRED, INTRA_FRAME, NONE},
59 {NEARESTMV, LAST_FRAME, NONE},
60 {NEARMV, LAST_FRAME, NONE},
62 {ZEROMV, GOLDEN_FRAME, NONE},
63 {NEARESTMV, GOLDEN_FRAME, NONE},
65 {ZEROMV, ALTREF_FRAME, NONE},
66 {NEARESTMV, ALTREF_FRAME, NONE},
68 {NEARMV, GOLDEN_FRAME, NONE},
69 {NEARMV, ALTREF_FRAME, NONE},
71 {V_PRED, INTRA_FRAME, NONE},
72 {H_PRED, INTRA_FRAME, NONE},
73 {D45_PRED, INTRA_FRAME, NONE},
74 {D135_PRED, INTRA_FRAME, NONE},
75 {D117_PRED, INTRA_FRAME, NONE},
76 {D153_PRED, INTRA_FRAME, NONE},
77 {D27_PRED, INTRA_FRAME, NONE},
78 {D63_PRED, INTRA_FRAME, NONE},
80 {TM_PRED, INTRA_FRAME, NONE},
82 {NEWMV, LAST_FRAME, NONE},
83 {NEWMV, GOLDEN_FRAME, NONE},
84 {NEWMV, ALTREF_FRAME, NONE},
86 {SPLITMV, LAST_FRAME, NONE},
87 {SPLITMV, GOLDEN_FRAME, NONE},
88 {SPLITMV, ALTREF_FRAME, NONE},
90 {I4X4_PRED, INTRA_FRAME, NONE},
92 /* compound prediction modes */
93 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
94 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
95 {NEARMV, LAST_FRAME, ALTREF_FRAME},
97 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
98 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
99 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
101 {NEWMV, LAST_FRAME, ALTREF_FRAME},
102 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
104 {SPLITMV, LAST_FRAME, ALTREF_FRAME},
105 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
108 // The baseline rd thresholds for breaking out of the rd loop for
109 // certain modes are assumed to be based on 8x8 blocks.
110 // This table is used to correct for blocks size.
111 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
112 static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
113 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
115 #define BASE_RD_THRESH_FREQ_FACT 16
116 #define MAX_RD_THRESH_FREQ_FACT 32
117 #define MAX_RD_THRESH_FREQ_INC 1
119 static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
120 vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
121 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
124 for (t = TX_4X4; t <= TX_32X32; t++)
125 for (i = 0; i < BLOCK_TYPES; i++)
126 for (j = 0; j < REF_TYPES; j++)
127 for (k = 0; k < COEF_BANDS; k++)
128 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
129 vp9_prob probs[ENTROPY_NODES];
130 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
131 vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
133 #if CONFIG_BALANCED_COEFTREE
134 // Replace the eob node prob with a very small value so that the
135 // cost approximately equals the cost without the eob node
137 vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
139 vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
141 assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
142 cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
147 static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0,
149 0, 0, 0, 0, 0, 0, 0, 0,
150 0, 0, 0, 0, 0, 0, 0, 0, };
152 // 3* dc_qlookup[Q]*dc_qlookup[Q];
154 /* values are now correlated to quantizer */
155 static int sad_per_bit16lut[QINDEX_RANGE];
156 static int sad_per_bit4lut[QINDEX_RANGE];
158 void vp9_init_me_luts() {
161 // Initialize the sad lut tables using a formulaic calculation for now
162 // This is to make it easier to resolve the impact of experimental changes
163 // to the quantizer tables.
164 for (i = 0; i < QINDEX_RANGE; i++) {
165 sad_per_bit16lut[i] =
166 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
167 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
171 static int compute_rd_mult(int qindex) {
172 const int q = vp9_dc_quant(qindex, 0);
173 return (11 * q * q) >> 2;
176 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
177 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
178 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
182 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
185 vp9_clear_system_state(); // __asm emms;
187 // Further tests required to see if optimum is different
188 // for key frames, golden frames and arf frames.
189 // if (cpi->common.refresh_golden_frame ||
190 // cpi->common.refresh_alt_ref_frame)
191 qindex = clamp(qindex, 0, MAXQ);
193 cpi->RDMULT = compute_rd_mult(qindex);
194 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
195 if (cpi->twopass.next_iiratio > 31)
196 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
199 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
201 cpi->mb.errorperbit = cpi->RDMULT >> 6;
202 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
204 vp9_set_speed_features(cpi);
206 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
211 if (cpi->RDMULT > 1000) {
215 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
216 for (i = 0; i < MAX_MODES; ++i) {
217 // Threshold here seem unecessarily harsh but fine given actual
218 // range of values used for cpi->sf.thresh_mult[]
219 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
221 // *4 relates to the scaling of rd_thresh_block_size_factor[]
222 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
223 cpi->rd_threshes[bsize][i] =
224 cpi->sf.thresh_mult[i] * q *
225 rd_thresh_block_size_factor[bsize] / (4 * 100);
227 cpi->rd_threshes[bsize][i] = INT_MAX;
229 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
230 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
236 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
237 for (i = 0; i < MAX_MODES; i++) {
238 // Threshold here seem unecessarily harsh but fine given actual
239 // range of values used for cpi->sf.thresh_mult[]
240 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
242 if (cpi->sf.thresh_mult[i] < thresh_max) {
243 cpi->rd_threshes[bsize][i] =
244 cpi->sf.thresh_mult[i] * q *
245 rd_thresh_block_size_factor[bsize] / 4;
247 cpi->rd_threshes[bsize][i] = INT_MAX;
249 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
250 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
255 fill_token_costs(cpi->mb.token_costs,
256 cpi->mb.token_costs_noskip,
257 cpi->common.fc.coef_probs);
259 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
260 vp9_cost_tokens(cpi->mb.partition_cost[i],
261 cpi->common.fc.partition_prob[cpi->common.frame_type][i],
264 /*rough estimate for costing*/
265 vp9_init_mode_costs(cpi);
267 if (cpi->common.frame_type != KEY_FRAME) {
268 vp9_build_nmv_cost_table(
269 cpi->mb.nmvjointcost,
270 cpi->mb.e_mbd.allow_high_precision_mv ?
271 cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
272 &cpi->common.fc.nmvc,
273 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
277 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
278 intptr_t block_size) {
282 for (i = 0; i < block_size; i++) {
283 int this_diff = coeff[i] - dqcoeff[i];
284 error += (unsigned)this_diff * this_diff;
290 static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
291 int plane, int block, PLANE_TYPE type,
296 MACROBLOCKD *const xd = &mb->e_mbd;
297 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
301 const int *scan, *nb;
302 const int eob = xd->plane[plane].eobs[block];
303 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
305 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
306 unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
307 mb->token_costs[tx_size][type][ref];
308 ENTROPY_CONTEXT above_ec, left_ec;
309 TX_TYPE tx_type = DCT_DCT;
311 const int segment_id = xd->mode_info_context->mbmi.segment_id;
312 unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
313 mb->token_costs_noskip[tx_size][type][ref];
315 int seg_eob, default_eob;
316 uint8_t token_cache[1024];
317 const uint8_t * band_translate;
319 // Check for consistency of tx_size with mode info
320 assert((!type && !plane) || (type && plane));
321 if (type == PLANE_TYPE_Y_WITH_DC) {
322 assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
324 TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
325 assert(tx_size == tx_size_uv);
330 tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
331 get_tx_type_4x4(xd, block) : DCT_DCT;
332 above_ec = A[0] != 0;
335 scan = get_scan_4x4(tx_type);
336 band_translate = vp9_coefband_trans_4x4;
340 const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
341 get_tx_type_8x8(xd) : DCT_DCT;
342 above_ec = (A[0] + A[1]) != 0;
343 left_ec = (L[0] + L[1]) != 0;
344 scan = get_scan_8x8(tx_type);
346 band_translate = vp9_coefband_trans_8x8plus;
350 const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
351 get_tx_type_16x16(xd) : DCT_DCT;
352 scan = get_scan_16x16(tx_type);
354 above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
355 left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
356 band_translate = vp9_coefband_trans_8x8plus;
360 scan = vp9_default_scan_32x32;
362 above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
363 left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
364 band_translate = vp9_coefband_trans_8x8plus;
370 assert(eob <= seg_eob);
372 pt = combine_entropy_contexts(above_ec, left_ec);
373 nb = vp9_get_coef_neighbors_handle(scan, &pad);
374 default_eob = seg_eob;
376 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
379 /* sanity check to ensure that we do not have spurious non-zero q values */
381 assert(qcoeff_ptr[scan[eob]] == 0);
384 for (c = 0; c < eob; c++) {
385 int v = qcoeff_ptr[scan[c]];
386 int t = vp9_dct_value_tokens_ptr[v].token;
387 int band = get_coef_band(band_translate, c);
389 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
391 if (!c || token_cache[scan[c - 1]]) // do not skip eob
392 cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
394 cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
395 token_cache[scan[c]] = vp9_pt_energy_class[t];
399 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
400 cost += mb->token_costs_noskip[tx_size][type][ref]
401 [get_coef_band(band_translate, c)]
406 // is eob first coefficient;
407 for (pt = 0; pt < (1 << tx_size); pt++) {
408 A[pt] = L[pt] = c > 0;
414 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
415 int (*r)[2], int *rate,
416 int64_t *d, int64_t *distortion,
418 int64_t txfm_cache[NB_TXFM_MODES],
419 TX_SIZE max_txfm_size) {
420 VP9_COMMON *const cm = &cpi->common;
421 MACROBLOCKD *const xd = &x->e_mbd;
422 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
423 vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
424 int64_t rd[TX_SIZE_MAX_SB][2];
428 const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
430 for (n = TX_4X4; n <= max_txfm_size; n++) {
432 for (m = 0; m <= n - (n == max_txfm_size); m++) {
434 r[n][1] += vp9_cost_zero(tx_probs[m]);
436 r[n][1] += vp9_cost_one(tx_probs[m]);
440 assert(skip_prob > 0);
441 s0 = vp9_cost_bit(skip_prob, 0);
442 s1 = vp9_cost_bit(skip_prob, 1);
444 for (n = TX_4X4; n <= max_txfm_size; n++) {
446 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
448 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
449 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
453 if (max_txfm_size == TX_32X32 &&
454 (cm->txfm_mode == ALLOW_32X32 ||
455 (cm->txfm_mode == TX_MODE_SELECT &&
456 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
457 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
458 mbmi->txfm_size = TX_32X32;
459 } else if (max_txfm_size >= TX_16X16 &&
460 (cm->txfm_mode == ALLOW_16X16 ||
461 cm->txfm_mode == ALLOW_32X32 ||
462 (cm->txfm_mode == TX_MODE_SELECT &&
463 rd[TX_16X16][1] < rd[TX_8X8][1] &&
464 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
465 mbmi->txfm_size = TX_16X16;
466 } else if (cm->txfm_mode == ALLOW_8X8 ||
467 cm->txfm_mode == ALLOW_16X16 ||
468 cm->txfm_mode == ALLOW_32X32 ||
469 (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
470 mbmi->txfm_size = TX_8X8;
472 mbmi->txfm_size = TX_4X4;
475 *distortion = d[mbmi->txfm_size];
476 *rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
477 *skip = s[mbmi->txfm_size];
479 txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
480 txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
481 txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
482 txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
483 if (max_txfm_size == TX_32X32 &&
484 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
485 rd[TX_32X32][1] < rd[TX_4X4][1])
486 txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
487 else if (max_txfm_size >= TX_16X16 &&
488 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
489 txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
491 txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
492 rd[TX_4X4][1] : rd[TX_8X8][1];
495 static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
497 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
498 return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
499 16 << (bwl + bhl)) >> shift;
502 static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
504 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
508 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
509 const int subsampling = x->e_mbd.plane[plane].subsampling_x +
510 x->e_mbd.plane[plane].subsampling_y;
511 sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
512 16 << (bwl + bhl - subsampling));
517 struct rdcost_block_args {
520 ENTROPY_CONTEXT t_above[16];
521 ENTROPY_CONTEXT t_left[16];
528 static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
529 int ss_txfrm_size, void *arg) {
530 struct rdcost_block_args* args = arg;
532 MACROBLOCKD * const xd = &args->x->e_mbd;
534 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
537 args->cost += cost_coeffs(args->cm, args->x, plane, block,
538 xd->plane[plane].plane_type, args->t_above + x_idx,
539 args->t_left + y_idx, args->tx_size,
540 args->bw * args->bh);
543 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
544 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
545 MACROBLOCKD * const xd = &x->e_mbd;
546 const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
547 const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
548 const int bw = 1 << bwl, bh = 1 << bhl;
549 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
551 vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
552 sizeof(ENTROPY_CONTEXT) * bw);
553 vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
554 sizeof(ENTROPY_CONTEXT) * bh);
556 foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
561 static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
562 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
565 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
566 cost += rdcost_plane(cm, x, plane, bsize, tx_size);
571 static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
572 int *rate, int64_t *distortion,
574 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
575 MACROBLOCKD *const xd = &x->e_mbd;
576 xd->mode_info_context->mbmi.txfm_size = tx_size;
578 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
579 vp9_encode_intra_block_y(cm, x, bsize);
581 vp9_xform_quant_sby(cm, x, bsize);
583 *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
584 *rate = rdcost_plane(cm, x, 0, bsize, tx_size);
585 *skippable = vp9_sby_is_skippable(xd, bsize);
588 static void super_block_yrd(VP9_COMP *cpi,
589 MACROBLOCK *x, int *rate, int64_t *distortion,
590 int *skip, BLOCK_SIZE_TYPE bs,
591 int64_t txfm_cache[NB_TXFM_MODES]) {
592 VP9_COMMON *const cm = &cpi->common;
593 int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
594 int64_t d[TX_SIZE_MAX_SB];
595 MACROBLOCKD *xd = &x->e_mbd;
596 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
598 assert(bs == mbmi->sb_type);
599 if (mbmi->ref_frame[0] > INTRA_FRAME)
600 vp9_subtract_sby(x, bs);
602 if (cpi->sf.use_largest_txform) {
603 if (bs >= BLOCK_SIZE_SB32X32) {
604 mbmi->txfm_size = TX_32X32;
605 } else if (bs >= BLOCK_SIZE_MB16X16) {
606 mbmi->txfm_size = TX_16X16;
607 } else if (bs >= BLOCK_SIZE_SB8X8) {
608 mbmi->txfm_size = TX_8X8;
610 mbmi->txfm_size = TX_4X4;
612 vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
613 super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
617 if (bs >= BLOCK_SIZE_SB32X32)
618 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
620 if (bs >= BLOCK_SIZE_MB16X16)
621 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
623 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
625 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
628 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
630 TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
631 - (bs < BLOCK_SIZE_MB16X16));
634 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
635 MB_PREDICTION_MODE *best_mode,
637 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
638 int *bestrate, int *bestratey,
639 int64_t *bestdistortion,
640 BLOCK_SIZE_TYPE bsize) {
641 MB_PREDICTION_MODE mode;
642 MACROBLOCKD *xd = &x->e_mbd;
643 int64_t best_rd = INT64_MAX;
646 VP9_COMMON *const cm = &cpi->common;
647 const int src_stride = x->plane[0].src.stride;
649 int16_t *src_diff, *coeff;
651 ENTROPY_CONTEXT ta[2], tempa[2];
652 ENTROPY_CONTEXT tl[2], templ[2];
653 TX_TYPE tx_type = DCT_DCT;
654 TX_TYPE best_tx_type = DCT_DCT;
655 int bw = 1 << b_width_log2(bsize);
656 int bh = 1 << b_height_log2(bsize);
658 DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
662 vpx_memcpy(ta, a, sizeof(ta));
663 vpx_memcpy(tl, l, sizeof(tl));
664 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
666 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
670 rate = bmode_costs[mode];
673 vpx_memcpy(tempa, ta, sizeof(ta));
674 vpx_memcpy(templ, tl, sizeof(tl));
676 for (idy = 0; idy < bh; ++idy) {
677 for (idx = 0; idx < bw; ++idx) {
678 block = ib + idy * 2 + idx;
679 xd->mode_info_context->bmi[block].as_mode.first = mode;
680 src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
681 x->plane[0].src.buf, src_stride);
682 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
683 x->plane[0].src_diff);
684 coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
685 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
686 xd->plane[0].dst.buf,
687 xd->plane[0].dst.stride);
688 vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
689 TX_4X4, mode, dst, xd->plane[0].dst.stride);
690 vp9_subtract_block(4, 4, src_diff, 8,
692 dst, xd->plane[0].dst.stride);
694 tx_type = get_tx_type_4x4(xd, block);
695 if (tx_type != DCT_DCT) {
696 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
697 x->quantize_b_4x4(x, block, tx_type, 16);
699 x->fwd_txm4x4(src_diff, coeff, 16);
700 x->quantize_b_4x4(x, block, tx_type, 16);
703 ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
704 tempa + idx, templ + idy, TX_4X4, 16);
705 distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
706 block, 16), 16) >> 2;
708 if (best_tx_type != DCT_DCT)
709 vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
710 dst, xd->plane[0].dst.stride, best_tx_type);
712 xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
713 dst, xd->plane[0].dst.stride);
718 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
720 if (this_rd < best_rd) {
723 *bestdistortion = distortion;
726 best_tx_type = tx_type;
727 vpx_memcpy(a, tempa, sizeof(tempa));
728 vpx_memcpy(l, templ, sizeof(templ));
729 for (idy = 0; idy < bh; ++idy) {
730 for (idx = 0; idx < bw; ++idx) {
731 block = ib + idy * 2 + idx;
732 vpx_memcpy(best_dqcoeff[idy * 2 + idx],
733 BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
734 sizeof(best_dqcoeff[0]));
740 for (idy = 0; idy < bh; ++idy) {
741 for (idx = 0; idx < bw; ++idx) {
742 block = ib + idy * 2 + idx;
743 xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
744 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
745 xd->plane[0].dst.buf,
746 xd->plane[0].dst.stride);
748 vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
749 *best_mode, dst, xd->plane[0].dst.stride);
751 if (best_tx_type != DCT_DCT)
752 vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
753 xd->plane[0].dst.stride, best_tx_type);
755 xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
756 xd->plane[0].dst.stride);
763 static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
764 int *Rate, int *rate_y,
765 int64_t *Distortion, int64_t best_rd) {
767 MACROBLOCKD *const xd = &mb->e_mbd;
768 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
769 int bw = 1 << b_width_log2(bsize);
770 int bh = 1 << b_height_log2(bsize);
773 int64_t distortion = 0;
775 int64_t total_rd = 0;
776 ENTROPY_CONTEXT t_above[4], t_left[4];
778 MODE_INFO *const mic = xd->mode_info_context;
780 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
781 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
783 bmode_costs = mb->mbmode_cost;
785 for (idy = 0; idy < 2; idy += bh) {
786 for (idx = 0; idx < 2; idx += bw) {
787 const int mis = xd->mode_info_stride;
788 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
789 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
790 int64_t UNINITIALIZED_IS_SAFE(d);
793 if (xd->frame_type == KEY_FRAME) {
794 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
795 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
796 left_block_mode(mic, i) : DC_PRED;
798 bmode_costs = mb->y_mode_costs[A][L];
801 total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
802 t_above + idx, t_left + idy,
808 mic->bmi[i].as_mode.first = best_mode;
809 for (j = 1; j < bh; ++j)
810 mic->bmi[i + j * 2].as_mode.first = best_mode;
811 for (j = 1; j < bw; ++j)
812 mic->bmi[i + j].as_mode.first = best_mode;
814 if (total_rd >= best_rd)
819 if (total_rd >= best_rd)
823 *rate_y = tot_rate_y;
824 *Distortion = distortion;
825 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
827 return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
830 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
831 int *rate, int *rate_tokenonly,
832 int64_t *distortion, int *skippable,
833 BLOCK_SIZE_TYPE bsize,
834 int64_t txfm_cache[NB_TXFM_MODES]) {
835 MB_PREDICTION_MODE mode;
836 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
837 MACROBLOCKD *const xd = &x->e_mbd;
838 int this_rate, this_rate_tokenonly, s;
839 int64_t this_distortion;
840 int64_t best_rd = INT64_MAX, this_rd;
841 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
843 int *bmode_costs = x->mbmode_cost;
845 if (bsize < BLOCK_SIZE_SB8X8) {
846 x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
850 for (i = 0; i < NB_TXFM_MODES; i++)
851 txfm_cache[i] = INT64_MAX;
853 /* Y Search for 32x32 intra prediction mode */
854 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
855 int64_t local_txfm_cache[NB_TXFM_MODES];
856 MODE_INFO *const mic = xd->mode_info_context;
857 const int mis = xd->mode_info_stride;
859 if (cpi->common.frame_type == KEY_FRAME) {
860 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
861 const MB_PREDICTION_MODE L = xd->left_available ?
862 left_block_mode(mic, 0) : DC_PRED;
864 bmode_costs = x->y_mode_costs[A][L];
866 x->e_mbd.mode_info_context->mbmi.mode = mode;
868 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
869 bsize, local_txfm_cache);
871 this_rate = this_rate_tokenonly + bmode_costs[mode];
872 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
874 if (this_rd < best_rd) {
875 mode_selected = mode;
877 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size;
879 *rate_tokenonly = this_rate_tokenonly;
880 *distortion = this_distortion;
884 for (i = 0; i < NB_TXFM_MODES; i++) {
885 int64_t adj_rd = this_rd + local_txfm_cache[i] -
886 local_txfm_cache[cpi->common.txfm_mode];
887 if (adj_rd < txfm_cache[i]) {
888 txfm_cache[i] = adj_rd;
893 x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
894 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
899 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
900 int *rate, int64_t *distortion,
901 int *skippable, BLOCK_SIZE_TYPE bsize,
902 TX_SIZE uv_tx_size) {
903 MACROBLOCKD *const xd = &x->e_mbd;
904 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
905 vp9_encode_intra_block_uv(cm, x, bsize);
907 vp9_xform_quant_sbuv(cm, x, bsize);
909 *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
910 *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
911 *skippable = vp9_sbuv_is_skippable(xd, bsize);
914 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
915 int *rate, int64_t *distortion, int *skippable,
916 BLOCK_SIZE_TYPE bsize) {
917 MACROBLOCKD *const xd = &x->e_mbd;
918 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
920 if (mbmi->ref_frame[0] > INTRA_FRAME)
921 vp9_subtract_sbuv(x, bsize);
923 if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
924 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
926 } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
927 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
929 } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) {
930 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
933 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
938 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
939 int *rate, int *rate_tokenonly,
940 int64_t *distortion, int *skippable,
941 BLOCK_SIZE_TYPE bsize) {
942 MB_PREDICTION_MODE mode;
943 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
944 int64_t best_rd = INT64_MAX, this_rd;
945 int this_rate_tokenonly, this_rate, s;
946 int64_t this_distortion;
948 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
949 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
950 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
951 &this_distortion, &s, bsize);
952 this_rate = this_rate_tokenonly +
953 x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
954 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
956 if (this_rd < best_rd) {
957 mode_selected = mode;
960 *rate_tokenonly = this_rate_tokenonly;
961 *distortion = this_distortion;
966 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
971 int vp9_cost_mv_ref(VP9_COMP *cpi,
972 MB_PREDICTION_MODE m,
973 const int mode_context) {
974 MACROBLOCKD *xd = &cpi->mb.e_mbd;
975 int segment_id = xd->mode_info_context->mbmi.segment_id;
977 // Dont account for mode here if segment skip is enabled.
978 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
979 VP9_COMMON *pc = &cpi->common;
980 assert(NEARESTMV <= m && m <= NEWMV);
981 return cost_token(vp9_sb_mv_ref_tree,
982 pc->fc.inter_mode_probs[mode_context],
983 vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
988 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
989 x->e_mbd.mode_info_context->mbmi.mode = mb;
990 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
993 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
994 BLOCK_SIZE_TYPE bsize,
996 int mi_row, int mi_col,
997 int_mv single_newmv[MAX_REF_FRAMES],
999 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1000 BLOCK_SIZE_TYPE bsize,
1001 int mi_row, int mi_col,
1002 int_mv *tmp_mv, int *rate_mv);
1004 static int labels2mode(MACROBLOCK *x, int i,
1005 MB_PREDICTION_MODE this_mode,
1006 int_mv *this_mv, int_mv *this_second_mv,
1007 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1008 int_mv seg_mvs[MAX_REF_FRAMES],
1009 int_mv *best_ref_mv,
1010 int_mv *second_best_ref_mv,
1011 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1012 MACROBLOCKD *const xd = &x->e_mbd;
1013 MODE_INFO *const mic = xd->mode_info_context;
1014 MB_MODE_INFO * mbmi = &mic->mbmi;
1015 int cost = 0, thismvcost = 0;
1017 int bw = 1 << b_width_log2(mbmi->sb_type);
1018 int bh = 1 << b_height_log2(mbmi->sb_type);
1020 /* We have to be careful retrieving previously-encoded motion vectors.
1021 Ones from this macroblock have to be pulled from the BLOCKD array
1022 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1023 MB_PREDICTION_MODE m;
1025 // the only time we should do costing for new motion vector or mode
1026 // is when we are on a new label (jbb May 08, 2007)
1027 switch (m = this_mode) {
1029 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1030 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1031 102, xd->allow_high_precision_mv);
1032 if (mbmi->ref_frame[1] > 0) {
1033 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1034 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1035 mvjcost, mvcost, 102,
1036 xd->allow_high_precision_mv);
1040 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1041 if (mbmi->ref_frame[1] > 0)
1042 this_second_mv->as_int =
1043 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1046 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1047 if (mbmi->ref_frame[1] > 0)
1048 this_second_mv->as_int =
1049 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1052 this_mv->as_int = 0;
1053 if (mbmi->ref_frame[1] > 0)
1054 this_second_mv->as_int = 0;
1060 cost = vp9_cost_mv_ref(cpi, this_mode,
1061 mbmi->mb_mode_context[mbmi->ref_frame[0]]);
1063 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1064 if (mbmi->ref_frame[1] > 0)
1065 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1067 x->partition_info->bmi[i].mode = m;
1068 x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
1069 if (mbmi->ref_frame[1] > 0)
1070 x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
1071 for (idy = 0; idy < bh; ++idy) {
1072 for (idx = 0; idx < bw; ++idx) {
1073 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1074 &mic->bmi[i], sizeof(mic->bmi[i]));
1075 vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
1076 &x->partition_info->bmi[i],
1077 sizeof(x->partition_info->bmi[i]));
1085 static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
1089 int64_t *distortion,
1090 ENTROPY_CONTEXT *ta,
1091 ENTROPY_CONTEXT *tl) {
1093 MACROBLOCKD *xd = &x->e_mbd;
1094 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1095 int bwl = b_width_log2(bsize), bw = 1 << bwl;
1096 int bhl = b_height_log2(bsize), bh = 1 << bhl;
1098 const int src_stride = x->plane[0].src.stride;
1099 uint8_t* const src =
1100 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1101 x->plane[0].src.buf, src_stride);
1103 raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
1104 x->plane[0].src_diff);
1105 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
1106 uint8_t* const pre =
1107 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1108 xd->plane[0].pre[0].buf,
1109 xd->plane[0].pre[0].stride);
1110 uint8_t* const dst =
1111 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1112 xd->plane[0].dst.buf,
1113 xd->plane[0].dst.stride);
1114 int64_t thisdistortion = 0;
1120 vp9_build_inter_predictor(pre,
1121 xd->plane[0].pre[0].stride,
1123 xd->plane[0].dst.stride,
1124 &xd->mode_info_context->bmi[i].as_mv[0],
1125 &xd->scale_factor[0],
1126 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix,
1129 // TODO(debargha): Make this work properly with the
1130 // implicit-compoundinter-weight experiment when implicit
1131 // weighting for splitmv modes is turned on.
1132 if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
1133 uint8_t* const second_pre =
1134 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1135 xd->plane[0].pre[1].buf,
1136 xd->plane[0].pre[1].stride);
1137 vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
1138 dst, xd->plane[0].dst.stride,
1139 &xd->mode_info_context->bmi[i].as_mv[1],
1140 &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
1141 &xd->subpix, MV_PRECISION_Q3);
1144 vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
1146 dst, xd->plane[0].dst.stride);
1149 for (idy = 0; idy < bh; ++idy) {
1150 for (idx = 0; idx < bw; ++idx) {
1151 k += (idy * 2 + idx);
1152 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1153 x->plane[0].src_diff);
1154 coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
1155 x->fwd_txm4x4(src_diff, coeff, 16);
1156 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1157 thisdistortion += vp9_block_error(coeff,
1158 BLOCK_OFFSET(xd->plane[0].dqcoeff,
1160 thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
1162 tl + (k >> 1), TX_4X4, 16);
1165 *distortion += thisdistortion;
1166 *labelyrate += thisrate;
1169 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1173 int_mv *ref_mv, *second_ref_mv;
1180 MB_PREDICTION_MODE modes[4];
1181 int_mv mvs[4], second_mvs[4];
1186 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1188 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1189 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1190 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1191 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1195 static enum BlockSize get_block_size(int bw, int bh) {
1196 if (bw == 4 && bh == 4)
1199 if (bw == 4 && bh == 8)
1202 if (bw == 8 && bh == 4)
1205 if (bw == 8 && bh == 8)
1208 if (bw == 8 && bh == 16)
1211 if (bw == 16 && bh == 8)
1214 if (bw == 16 && bh == 16)
1217 if (bw == 32 && bh == 32)
1220 if (bw == 32 && bh == 16)
1223 if (bw == 16 && bh == 32)
1226 if (bw == 64 && bh == 32)
1229 if (bw == 32 && bh == 64)
1232 if (bw == 64 && bh == 64)
1239 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1240 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1241 x->plane[0].src.buf =
1242 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1243 x->plane[0].src.buf,
1244 x->plane[0].src.stride);
1245 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1246 x->e_mbd.plane[0].pre[0].buf =
1247 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1248 x->e_mbd.plane[0].pre[0].buf,
1249 x->e_mbd.plane[0].pre[0].stride);
1250 if (mbmi->ref_frame[1])
1251 x->e_mbd.plane[0].pre[1].buf =
1252 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1253 x->e_mbd.plane[0].pre[1].buf,
1254 x->e_mbd.plane[0].pre[1].stride);
1257 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1258 struct buf_2d orig_pre[2]) {
1259 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1260 x->plane[0].src = orig_src;
1261 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1262 if (mbmi->ref_frame[1])
1263 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1266 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1268 int_mv seg_mvs[4][MAX_REF_FRAMES],
1269 int mi_row, int mi_col) {
1270 int i, j, br = 0, rate = 0, sbr = 0, idx, idy;
1271 int64_t bd = 0, sbd = 0;
1272 MB_PREDICTION_MODE this_mode;
1273 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1274 const int label_count = 4;
1275 int64_t this_segment_rd = 0, other_segment_rd;
1276 int label_mv_thresh;
1277 int segmentyrate = 0;
1278 int best_eobs[4] = { 0 };
1279 BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1280 int bwl = b_width_log2(bsize), bw = 1 << bwl;
1281 int bhl = b_height_log2(bsize), bh = 1 << bhl;
1282 vp9_variance_fn_ptr_t *v_fn_ptr;
1283 ENTROPY_CONTEXT t_above[4], t_left[4];
1284 ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
1286 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1287 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1289 v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
1291 // 64 makes this threshold really big effectively
1292 // making it so that we very rarely check mvs on
1293 // segments. setting this to 1 would make mv thresh
1294 // roughly equal to what it is for macroblocks
1295 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1297 // Segmentation method overheads
1298 other_segment_rd = this_segment_rd;
1300 for (idy = 0; idy < 2; idy += bh) {
1301 for (idx = 0; idx < 2; idx += bw) {
1302 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1303 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1304 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1305 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1306 int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
1307 MB_PREDICTION_MODE mode_selected = ZEROMV;
1308 int bestlabelyrate = 0;
1311 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1312 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1313 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1314 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1315 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1317 if (mbmi->ref_frame[1] > 0)
1318 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1319 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1320 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1323 // search for the best motion vector on this segment
1324 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1328 ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
1329 const struct buf_2d orig_src = x->plane[0].src;
1330 struct buf_2d orig_pre[2];
1332 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1334 vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
1335 vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
1337 // motion search for newmv (single predictor case only)
1338 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV) {
1341 int thissme, bestsme = INT_MAX;
1342 int sadpb = x->sadperbit4;
1345 /* Is the best so far sufficiently good that we cant justify doing
1346 * and new motion search. */
1347 if (best_label_rd < label_mv_thresh)
1350 if (cpi->compressor_speed) {
1351 // use previous block's result as next block's MV predictor.
1354 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1357 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1362 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1364 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1365 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1367 // adjust src pointer for this block
1369 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1370 sadpb, further_steps, 0, v_fn_ptr,
1371 bsi->ref_mv, &mode_mv[NEWMV]);
1373 // Should we do a full search (best quality only)
1374 if (cpi->compressor_speed == 0) {
1375 /* Check if mvp_full is within the range. */
1376 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1377 x->mv_row_min, x->mv_row_max);
1379 thissme = cpi->full_search_sad(x, &mvp_full,
1380 sadpb, 16, v_fn_ptr,
1381 x->nmvjointcost, x->mvcost,
1384 if (thissme < bestsme) {
1386 mode_mv[NEWMV].as_int =
1387 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1389 /* The full search result is actually worse so re-instate the
1390 * previous best vector */
1391 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1392 mode_mv[NEWMV].as_int;
1396 if (bestsme < INT_MAX) {
1399 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1400 bsi->ref_mv, x->errorperbit, v_fn_ptr,
1401 x->nmvjointcost, x->mvcost,
1404 // safe motion search result for use in compound prediction
1405 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1408 // restore src pointers
1409 mi_buf_restore(x, orig_src, orig_pre);
1410 } else if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV) {
1411 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1412 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1415 // adjust src pointers
1417 if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
1419 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1420 mi_row, mi_col, seg_mvs[i],
1422 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1423 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1424 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1425 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1427 // restore src pointers
1428 mi_buf_restore(x, orig_src, orig_pre);
1431 rate = labels2mode(x, i, this_mode, &mode_mv[this_mode],
1432 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1433 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1436 // Trap vectors that reach beyond the UMV borders
1437 if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1438 ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1439 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1440 ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1443 if (mbmi->ref_frame[1] > 0 &&
1444 mv_check_bounds(x, &second_mode_mv[this_mode]))
1447 this_rd = encode_inter_mb_segment(&cpi->common,
1449 &distortion, t_above_s, t_left_s);
1450 this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1453 if (this_rd < best_label_rd) {
1456 bestlabelyrate = labelyrate;
1457 mode_selected = this_mode;
1458 best_label_rd = this_rd;
1459 best_eobs[i] = x->e_mbd.plane[0].eobs[i];
1460 vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
1461 vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
1463 } /*for each 4x4 mode*/
1465 vpx_memcpy(t_above, t_above_b, sizeof(t_above));
1466 vpx_memcpy(t_left, t_left_b, sizeof(t_left));
1468 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1469 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1470 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1475 segmentyrate += bestlabelyrate;
1476 this_segment_rd += best_label_rd;
1477 other_segment_rd += best_other_rd;
1479 for (j = 1; j < bh; ++j)
1480 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1481 &x->partition_info->bmi[i],
1482 sizeof(x->partition_info->bmi[i]));
1483 for (j = 1; j < bw; ++j)
1484 vpx_memcpy(&x->partition_info->bmi[i + j],
1485 &x->partition_info->bmi[i],
1486 sizeof(x->partition_info->bmi[i]));
1488 } /* for each label */
1490 if (this_segment_rd < bsi->segment_rd) {
1493 bsi->segment_yrate = segmentyrate;
1494 bsi->segment_rd = this_segment_rd;
1496 // store everything needed to come back to this!!
1497 for (i = 0; i < 4; i++) {
1498 bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1499 if (mbmi->ref_frame[1] > 0)
1500 bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
1501 bsi->modes[i] = x->partition_info->bmi[i].mode;
1502 bsi->eobs[i] = best_eobs[i];
1507 static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
1508 int_mv *best_ref_mv,
1509 int_mv *second_best_ref_mv,
1513 int64_t *returndistortion,
1514 int *skippable, int mvthresh,
1515 int_mv seg_mvs[4][MAX_REF_FRAMES],
1516 int mi_row, int mi_col) {
1519 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1521 vpx_memset(&bsi, 0, sizeof(bsi));
1523 bsi.segment_rd = best_rd;
1524 bsi.ref_mv = best_ref_mv;
1525 bsi.second_ref_mv = second_best_ref_mv;
1526 bsi.mvp.as_int = best_ref_mv->as_int;
1527 bsi.mvthresh = mvthresh;
1529 for (i = 0; i < 4; i++)
1530 bsi.modes[i] = ZEROMV;
1532 rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col);
1534 /* set it to the best */
1535 for (i = 0; i < 4; i++) {
1536 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
1537 if (mbmi->ref_frame[1] > 0)
1538 x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
1539 bsi.second_mvs[i].as_int;
1540 x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
1543 /* save partitions */
1544 x->partition_info->count = 4;
1546 for (i = 0; i < x->partition_info->count; i++) {
1547 x->partition_info->bmi[i].mode = bsi.modes[i];
1548 x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv;
1549 if (mbmi->ref_frame[1] > 0)
1550 x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv;
1553 * used to set mbmi->mv.as_int
1555 x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int;
1556 if (mbmi->ref_frame[1] > 0)
1557 x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int;
1559 *returntotrate = bsi.r;
1560 *returndistortion = bsi.d;
1561 *returnyrate = bsi.segment_yrate;
1562 *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
1563 mbmi->mode = bsi.modes[3];
1565 return (int)(bsi.segment_rd);
1568 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
1569 uint8_t *ref_y_buffer, int ref_y_stride,
1570 int ref_frame, enum BlockSize block_size ) {
1571 MACROBLOCKD *xd = &x->e_mbd;
1572 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1577 int best_sad = INT_MAX;
1578 int this_sad = INT_MAX;
1580 uint8_t *src_y_ptr = x->plane[0].src.buf;
1582 int row_offset, col_offset;
1584 // Get the sad for each candidate reference mv
1585 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
1586 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
1588 // The list is at an end if we see 0 for a second time.
1589 if (!this_mv.as_int && zero_seen)
1591 zero_seen = zero_seen || !this_mv.as_int;
1593 row_offset = this_mv.as_mv.row >> 3;
1594 col_offset = this_mv.as_mv.col >> 3;
1595 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
1597 // Find sad for current vector.
1598 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
1599 ref_y_ptr, ref_y_stride,
1602 // Note if it is the best so far.
1603 if (this_sad < best_sad) {
1604 best_sad = this_sad;
1609 // Note the index of the mv that worked best in the reference list.
1610 x->mv_best_ref_index[ref_frame] = best_index;
1613 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
1614 unsigned int *ref_costs_single,
1615 unsigned int *ref_costs_comp,
1616 vp9_prob *comp_mode_p) {
1617 VP9_COMMON *const cm = &cpi->common;
1618 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
1619 int seg_ref_active = vp9_segfeature_active(xd, segment_id,
1621 if (seg_ref_active) {
1622 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
1623 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
1626 vp9_prob intra_inter_p = vp9_get_pred_prob(cm, xd, PRED_INTRA_INTER);
1627 vp9_prob comp_inter_p = 128;
1629 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
1630 comp_inter_p = vp9_get_pred_prob(cm, xd, PRED_COMP_INTER_INTER);
1631 *comp_mode_p = comp_inter_p;
1636 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
1638 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
1639 vp9_prob ref_single_p1 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P1);
1640 vp9_prob ref_single_p2 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P2);
1641 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1643 if (cm->comp_pred_mode == HYBRID_PREDICTION)
1644 base_cost += vp9_cost_bit(comp_inter_p, 0);
1646 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
1647 ref_costs_single[ALTREF_FRAME] = base_cost;
1648 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
1649 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1650 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1651 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
1652 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
1654 ref_costs_single[LAST_FRAME] = 512;
1655 ref_costs_single[GOLDEN_FRAME] = 512;
1656 ref_costs_single[ALTREF_FRAME] = 512;
1658 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
1659 vp9_prob ref_comp_p = vp9_get_pred_prob(cm, xd, PRED_COMP_REF_P);
1660 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1662 if (cm->comp_pred_mode == HYBRID_PREDICTION)
1663 base_cost += vp9_cost_bit(comp_inter_p, 1);
1665 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
1666 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
1668 ref_costs_comp[LAST_FRAME] = 512;
1669 ref_costs_comp[GOLDEN_FRAME] = 512;
1674 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1676 PARTITION_INFO *partition,
1678 int_mv *second_ref_mv,
1679 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
1680 int64_t txfm_size_diff[NB_TXFM_MODES]) {
1681 MACROBLOCKD *const xd = &x->e_mbd;
1683 // Take a snapshot of the coding context so it can be
1684 // restored if we decide to encode this way
1685 ctx->skip = x->skip;
1686 ctx->best_mode_index = mode_index;
1687 ctx->mic = *xd->mode_info_context;
1690 ctx->partition_info = *partition;
1692 ctx->best_ref_mv.as_int = ref_mv->as_int;
1693 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
1695 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
1696 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
1697 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
1699 memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
1702 static void setup_pred_block(const MACROBLOCKD *xd,
1703 struct buf_2d dst[MAX_MB_PLANE],
1704 const YV12_BUFFER_CONFIG *src,
1705 int mi_row, int mi_col,
1706 const struct scale_factors *scale,
1707 const struct scale_factors *scale_uv) {
1710 dst[0].buf = src->y_buffer;
1711 dst[0].stride = src->y_stride;
1712 dst[1].buf = src->u_buffer;
1713 dst[2].buf = src->v_buffer;
1714 dst[1].stride = dst[2].stride = src->uv_stride;
1716 dst[3].buf = src->alpha_buffer;
1717 dst[3].stride = src->alpha_stride;
1720 // TODO(jkoleszar): Make scale factors per-plane data
1721 for (i = 0; i < MAX_MB_PLANE; i++) {
1722 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
1723 i ? scale_uv : scale,
1724 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1728 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1729 int idx, MV_REFERENCE_FRAME frame_type,
1730 enum BlockSize block_size,
1731 int mi_row, int mi_col,
1732 int_mv frame_nearest_mv[MAX_REF_FRAMES],
1733 int_mv frame_near_mv[MAX_REF_FRAMES],
1734 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
1735 struct scale_factors scale[MAX_REF_FRAMES]) {
1736 VP9_COMMON *cm = &cpi->common;
1737 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
1738 MACROBLOCKD *const xd = &x->e_mbd;
1739 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1741 // set up scaling factors
1742 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
1744 scale[frame_type].x_offset_q4 =
1745 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
1746 VP9_REF_SCALE_SHIFT) & 0xf;
1747 scale[frame_type].y_offset_q4 =
1748 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
1749 VP9_REF_SCALE_SHIFT) & 0xf;
1751 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1752 // use the UV scaling factors.
1753 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
1754 &scale[frame_type], &scale[frame_type]);
1756 // Gets an initial list of candidate vectors from neighbours and orders them
1757 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
1758 xd->prev_mode_info_context,
1760 mbmi->ref_mvs[frame_type],
1761 cpi->common.ref_frame_sign_bias);
1763 // Candidate refinement carried out at encoder and decoder
1764 vp9_find_best_ref_mvs(xd,
1765 mbmi->ref_mvs[frame_type],
1766 &frame_nearest_mv[frame_type],
1767 &frame_near_mv[frame_type]);
1769 // Further refinement that is encode side only to test the top few candidates
1770 // in full and choose the best as the centre point for subsequent searches.
1771 // The current implementation doesn't support scaling.
1772 if (scale[frame_type].x_scale_fp == (1 << VP9_REF_SCALE_SHIFT) &&
1773 scale[frame_type].y_scale_fp == (1 << VP9_REF_SCALE_SHIFT))
1774 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
1775 frame_type, block_size);
1778 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
1779 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
1780 int fb = get_ref_frame_idx(cpi, ref_frame);
1781 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
1782 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
1783 return scaled_ref_frame;
1786 static double linear_interpolate(double x, int ntab, double step,
1787 const double *tab) {
1788 double y = x / step;
1792 return tab[ntab - 1];
1794 return tab[d] * (1 - a) + tab[d + 1] * a;
1797 static double model_rate_norm(double x) {
1799 // This function models the rate for a Laplacian source
1800 // source with given variance when quantized with a uniform quantizer
1801 // with given stepsize. The closed form expressions are in:
1802 // Hang and Chen, "Source Model for transform video coder and its
1803 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
1804 // Sys. for Video Tech., April 1997.
1805 static const double rate_tab_step = 0.125;
1806 static const double rate_tab[] = {
1807 256.0000, 4.944453, 3.949276, 3.371593,
1808 2.965771, 2.654550, 2.403348, 2.193612,
1809 2.014208, 1.857921, 1.719813, 1.596364,
1810 1.484979, 1.383702, 1.291025, 1.205767,
1811 1.126990, 1.053937, 0.985991, 0.922644,
1812 0.863472, 0.808114, 0.756265, 0.707661,
1813 0.662070, 0.619287, 0.579129, 0.541431,
1814 0.506043, 0.472828, 0.441656, 0.412411,
1815 0.384980, 0.359260, 0.335152, 0.312563,
1816 0.291407, 0.271600, 0.253064, 0.235723,
1817 0.219508, 0.204351, 0.190189, 0.176961,
1818 0.164611, 0.153083, 0.142329, 0.132298,
1819 0.122945, 0.114228, 0.106106, 0.098541,
1820 0.091496, 0.084937, 0.078833, 0.073154,
1821 0.067872, 0.062959, 0.058392, 0.054147,
1822 0.050202, 0.046537, 0.043133, 0.039971,
1823 0.037036, 0.034312, 0.031783, 0.029436,
1824 0.027259, 0.025240, 0.023367, 0.021631,
1825 0.020021, 0.018528, 0.017145, 0.015863,
1826 0.014676, 0.013575, 0.012556, 0.011612,
1827 0.010738, 0.009929, 0.009180, 0.008487,
1828 0.007845, 0.007251, 0.006701, 0.006193,
1829 0.005722, 0.005287, 0.004884, 0.004512,
1830 0.004168, 0.003850, 0.003556, 0.003284,
1831 0.003032, 0.002800, 0.002585, 0.002386,
1832 0.002203, 0.002034, 0.001877, 0.001732,
1833 0.001599, 0.001476, 0.001362, 0.001256,
1834 0.001159, 0.001069, 0.000987, 0.000910,
1835 0.000840, 0.000774, 0.000714, 0.000659,
1836 0.000608, 0.000560, 0.000517, 0.000476,
1837 0.000439, 0.000405, 0.000373, 0.000344,
1838 0.000317, 0.000292, 0.000270, 0.000248,
1839 0.000229, 0.000211, 0.000195, 0.000179,
1840 0.000165, 0.000152, 0.000140, 0.000129,
1841 0.000119, 0.000110, 0.000101, 0.000093,
1842 0.000086, 0.000079, 0.000073, 0.000067,
1843 0.000062, 0.000057, 0.000052, 0.000048,
1844 0.000044, 0.000041, 0.000038, 0.000035,
1845 0.000032, 0.000029, 0.000027, 0.000025,
1846 0.000023, 0.000021, 0.000019, 0.000018,
1847 0.000016, 0.000015, 0.000014, 0.000013,
1848 0.000012, 0.000011, 0.000010, 0.000009,
1849 0.000008, 0.000008, 0.000007, 0.000007,
1850 0.000006, 0.000006, 0.000005, 0.000005,
1851 0.000004, 0.000004, 0.000004, 0.000003,
1852 0.000003, 0.000003, 0.000003, 0.000002,
1853 0.000002, 0.000002, 0.000002, 0.000002,
1854 0.000002, 0.000001, 0.000001, 0.000001,
1855 0.000001, 0.000001, 0.000001, 0.000001,
1856 0.000001, 0.000001, 0.000001, 0.000001,
1857 0.000001, 0.000001, 0.000000, 0.000000,
1859 const int rate_tab_num = sizeof(rate_tab)/sizeof(rate_tab[0]);
1861 return linear_interpolate(x, rate_tab_num, rate_tab_step, rate_tab);
1864 static double model_dist_norm(double x) {
1865 // Normalized distortion
1866 // This function models the normalized distortion for a Laplacian source
1867 // source with given variance when quantized with a uniform quantizer
1868 // with given stepsize. The closed form expression is:
1869 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
1870 // where x = qpstep / sqrt(variance)
1871 // Note the actual distortion is Dn * variance.
1872 static const double dist_tab_step = 0.25;
1873 static const double dist_tab[] = {
1874 0.000000, 0.005189, 0.020533, 0.045381,
1875 0.078716, 0.119246, 0.165508, 0.215979,
1876 0.269166, 0.323686, 0.378318, 0.432034,
1877 0.484006, 0.533607, 0.580389, 0.624063,
1878 0.664475, 0.701581, 0.735418, 0.766092,
1879 0.793751, 0.818575, 0.840761, 0.860515,
1880 0.878045, 0.893554, 0.907238, 0.919281,
1881 0.929857, 0.939124, 0.947229, 0.954306,
1882 0.960475, 0.965845, 0.970512, 0.974563,
1883 0.978076, 0.981118, 0.983750, 0.986024,
1884 0.987989, 0.989683, 0.991144, 0.992402,
1885 0.993485, 0.994417, 0.995218, 0.995905,
1886 0.996496, 0.997002, 0.997437, 0.997809,
1887 0.998128, 0.998401, 0.998635, 0.998835,
1888 0.999006, 0.999152, 0.999277, 0.999384,
1889 0.999475, 0.999553, 0.999619, 0.999676,
1890 0.999724, 0.999765, 0.999800, 0.999830,
1891 0.999855, 0.999877, 0.999895, 0.999911,
1892 0.999924, 0.999936, 0.999945, 0.999954,
1893 0.999961, 0.999967, 0.999972, 0.999976,
1894 0.999980, 0.999983, 0.999985, 0.999988,
1895 0.999989, 0.999991, 0.999992, 0.999994,
1896 0.999995, 0.999995, 0.999996, 0.999997,
1897 0.999997, 0.999998, 0.999998, 0.999998,
1898 0.999999, 0.999999, 0.999999, 0.999999,
1899 0.999999, 0.999999, 0.999999, 1.000000,
1901 const int dist_tab_num = sizeof(dist_tab)/sizeof(dist_tab[0]);
1903 return linear_interpolate(x, dist_tab_num, dist_tab_step, dist_tab);
1906 static void model_rd_from_var_lapndz(int var, int n, int qstep,
1907 int *rate, int64_t *dist) {
1908 // This function models the rate and distortion for a Laplacian
1909 // source with given variance when quantized with a uniform quantizer
1910 // with given stepsize. The closed form expression is:
1911 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
1912 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance)
1913 vp9_clear_system_state();
1914 if (var == 0 || n == 0) {
1919 double s2 = (double) var / n;
1920 double x = qstep / sqrt(s2);
1921 // TODO(debargha): Make the modeling functions take (qstep^2 / s2)
1922 // as argument rather than qstep / sqrt(s2) to obviate the need for
1923 // the sqrt() operation.
1924 D = model_dist_norm(x);
1925 R = model_rate_norm(x);
1930 *rate = (n * R * 256 + 0.5);
1931 *dist = (n * D * s2 + 0.5);
1933 vp9_clear_system_state();
1936 static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
1937 struct macroblockd_plane *pd) {
1938 return get_block_size(plane_block_width(bsize, pd),
1939 plane_block_height(bsize, pd));
1942 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1943 MACROBLOCK *x, MACROBLOCKD *xd,
1944 int *out_rate_sum, int64_t *out_dist_sum) {
1945 // Note our transform coeffs are 8 times an orthogonal transform.
1946 // Hence quantizer step is also 8 times. To get effective quantizer
1947 // we need to divide by 8 before sending to modeling function.
1949 int i, rate_sum = 0;
1950 int64_t dist_sum = 0;
1952 for (i = 0; i < MAX_MB_PLANE; ++i) {
1953 struct macroblock_plane *const p = &x->plane[i];
1954 struct macroblockd_plane *const pd = &xd->plane[i];
1956 // TODO(dkovalev) the same code in get_plane_block_size
1957 const int bw = plane_block_width(bsize, pd);
1958 const int bh = plane_block_height(bsize, pd);
1959 const enum BlockSize bs = get_block_size(bw, bh);
1962 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
1963 pd->dst.buf, pd->dst.stride, &sse);
1965 model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
1971 *out_rate_sum = rate_sum;
1972 *out_dist_sum = dist_sum << 4;
1975 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
1976 MACROBLOCKD *xd = &x->e_mbd;
1977 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1979 const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
1980 const int m = vp9_switchable_interp_map[mbmi->interp_filter];
1981 return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
1984 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1985 BLOCK_SIZE_TYPE bsize,
1986 int mi_row, int mi_col,
1987 int_mv *tmp_mv, int *rate_mv) {
1988 MACROBLOCKD *xd = &x->e_mbd;
1989 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1990 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
1991 int bestsme = INT_MAX;
1992 int further_steps, step_param;
1993 int sadpb = x->sadperbit16;
1995 int ref = mbmi->ref_frame[0];
1996 int_mv ref_mv = mbmi->ref_mvs[ref][0];
1997 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
1999 int tmp_col_min = x->mv_col_min;
2000 int tmp_col_max = x->mv_col_max;
2001 int tmp_row_min = x->mv_row_min;
2002 int tmp_row_max = x->mv_row_max;
2004 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2006 if (scaled_ref_frame) {
2008 // Swap out the reference frame for a version that's been scaled to
2009 // match the resolution of the current frame, allowing the existing
2010 // motion search code to be used without additional modifications.
2011 for (i = 0; i < MAX_MB_PLANE; i++)
2012 backup_yv12[i] = xd->plane[i].pre[0];
2014 setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col,
2018 vp9_clamp_mv_min_max(x, &ref_mv);
2020 step_param = vp9_init_search_range(
2021 cpi, MIN(cpi->common.width, cpi->common.height));
2023 // mvp_full.as_int = ref_mv[0].as_int;
2025 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
2027 mvp_full.as_mv.col >>= 3;
2028 mvp_full.as_mv.row >>= 3;
2030 // Further step/diamond searches as necessary
2031 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2033 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2034 sadpb, further_steps, 1,
2035 &cpi->fn_ptr[block_size],
2038 x->mv_col_min = tmp_col_min;
2039 x->mv_col_max = tmp_col_max;
2040 x->mv_row_min = tmp_row_min;
2041 x->mv_row_max = tmp_row_max;
2043 if (bestsme < INT_MAX) {
2044 int dis; /* TODO: use dis in distortion calculation later. */
2046 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
2048 &cpi->fn_ptr[block_size],
2049 x->nmvjointcost, x->mvcost,
2052 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
2053 x->nmvjointcost, x->mvcost,
2054 96, xd->allow_high_precision_mv);
2055 if (scaled_ref_frame) {
2057 for (i = 0; i < MAX_MB_PLANE; i++)
2058 xd->plane[i].pre[0] = backup_yv12[i];
2062 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2063 BLOCK_SIZE_TYPE bsize,
2065 int mi_row, int mi_col,
2066 int_mv single_newmv[MAX_REF_FRAMES],
2068 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2069 MACROBLOCKD *xd = &x->e_mbd;
2070 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2071 int refs[2] = { mbmi->ref_frame[0],
2072 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2074 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2076 // Prediction buffer from second frame.
2077 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2079 // Do joint motion search in compound mode to get more accurate mv.
2080 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2081 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
2082 struct buf_2d scaled_first_yv12;
2083 int last_besterr[2] = {INT_MAX, INT_MAX};
2084 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2085 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
2086 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
2088 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2089 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2091 if (scaled_ref_frame[0]) {
2093 // Swap out the reference frame for a version that's been scaled to
2094 // match the resolution of the current frame, allowing the existing
2095 // motion search code to be used without additional modifications.
2096 for (i = 0; i < MAX_MB_PLANE; i++)
2097 backup_yv12[i] = xd->plane[i].pre[0];
2098 setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
2102 if (scaled_ref_frame[1]) {
2104 for (i = 0; i < MAX_MB_PLANE; i++)
2105 backup_second_yv12[i] = xd->plane[i].pre[1];
2107 setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
2111 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
2113 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
2115 scaled_first_yv12 = xd->plane[0].pre[0];
2117 // Initialize mv using single prediction mode result.
2118 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2119 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2121 // Allow joint search multiple times iteratively for each ref frame
2122 // and break out the search loop if it couldn't find better mv.
2123 for (ite = 0; ite < 4; ite++) {
2124 struct buf_2d ref_yv12[2];
2125 int bestsme = INT_MAX;
2126 int sadpb = x->sadperbit16;
2128 int search_range = 3;
2130 int tmp_col_min = x->mv_col_min;
2131 int tmp_col_max = x->mv_col_max;
2132 int tmp_row_min = x->mv_row_min;
2133 int tmp_row_max = x->mv_row_max;
2136 // Initialized here because of compiler problem in Visual Studio.
2137 ref_yv12[0] = xd->plane[0].pre[0];
2138 ref_yv12[1] = xd->plane[0].pre[1];
2140 // Get pred block from second frame.
2141 vp9_build_inter_predictor(ref_yv12[!id].buf,
2142 ref_yv12[!id].stride,
2144 &frame_mv[refs[!id]],
2145 &xd->scale_factor[!id],
2147 &xd->subpix, MV_PRECISION_Q3);
2149 // Compound motion search on first ref frame.
2151 xd->plane[0].pre[0] = ref_yv12[id];
2152 vp9_clamp_mv_min_max(x, &ref_mv[id]);
2154 // Use mv result from single mode as mvp.
2155 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2157 tmp_mv.as_mv.col >>= 3;
2158 tmp_mv.as_mv.row >>= 3;
2160 // Small-range full-pixel motion search
2161 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2163 &cpi->fn_ptr[block_size],
2164 x->nmvjointcost, x->mvcost,
2165 &ref_mv[id], second_pred,
2168 x->mv_col_min = tmp_col_min;
2169 x->mv_col_max = tmp_col_max;
2170 x->mv_row_min = tmp_row_min;
2171 x->mv_row_max = tmp_row_max;
2173 if (bestsme < INT_MAX) {
2174 int dis; /* TODO: use dis in distortion calculation later. */
2177 bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
2180 &cpi->fn_ptr[block_size],
2181 x->nmvjointcost, x->mvcost,
2182 &dis, &sse, second_pred,
2187 xd->plane[0].pre[0] = scaled_first_yv12;
2189 if (bestsme < last_besterr[id]) {
2190 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2191 last_besterr[id] = bestsme;
2197 // restore the predictor
2198 if (scaled_ref_frame[0]) {
2200 for (i = 0; i < MAX_MB_PLANE; i++)
2201 xd->plane[i].pre[0] = backup_yv12[i];
2204 if (scaled_ref_frame[1]) {
2206 for (i = 0; i < MAX_MB_PLANE; i++)
2207 xd->plane[i].pre[1] = backup_second_yv12[i];
2209 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2210 &mbmi->ref_mvs[refs[0]][0],
2211 x->nmvjointcost, x->mvcost, 96,
2212 x->e_mbd.allow_high_precision_mv);
2213 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2214 &mbmi->ref_mvs[refs[1]][0],
2215 x->nmvjointcost, x->mvcost, 96,
2216 x->e_mbd.allow_high_precision_mv);
2218 vpx_free(second_pred);
2221 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2222 BLOCK_SIZE_TYPE bsize,
2223 int64_t txfm_cache[],
2224 int *rate2, int64_t *distortion,
2226 int *rate_y, int64_t *distortion_y,
2227 int *rate_uv, int64_t *distortion_uv,
2228 int *mode_excluded, int *disable_skip,
2229 INTERPOLATIONFILTERTYPE *best_filter,
2231 int mi_row, int mi_col,
2232 int_mv single_newmv[MAX_REF_FRAMES]) {
2233 const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
2235 VP9_COMMON *cm = &cpi->common;
2236 MACROBLOCKD *xd = &x->e_mbd;
2237 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2238 const enum BlockSize uv_block_size = get_plane_block_size(bsize,
2240 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2241 const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2242 const int num_refs = is_comp_pred ? 2 : 1;
2243 const int this_mode = mbmi->mode;
2245 int refs[2] = { mbmi->ref_frame[0],
2246 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2248 int64_t this_rd = 0;
2249 unsigned char tmp_buf[MAX_MB_PLANE][64 * 64];
2250 int pred_exists = 0;
2251 int interpolating_intpel_seen = 0;
2253 int64_t rd, best_rd = INT64_MAX;
2255 switch (this_mode) {
2259 // Initialize mv using single prediction mode result.
2260 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2261 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2263 if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
2264 joint_motion_search(cpi, x, bsize, frame_mv,
2265 mi_row, mi_col, single_newmv, &rate_mv);
2267 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2268 &mbmi->ref_mvs[refs[0]][0],
2269 x->nmvjointcost, x->mvcost, 96,
2270 x->e_mbd.allow_high_precision_mv);
2271 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2272 &mbmi->ref_mvs[refs[1]][0],
2273 x->nmvjointcost, x->mvcost, 96,
2274 x->e_mbd.allow_high_precision_mv);
2276 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2277 frame_mv[refs[1]].as_int == INVALID_MV)
2283 single_motion_search(cpi, x, bsize, mi_row, mi_col,
2286 frame_mv[refs[0]].as_int =
2287 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2288 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2297 for (i = 0; i < num_refs; ++i) {
2298 cur_mv[i] = frame_mv[refs[i]];
2299 // Clip "next_nearest" so that it does not extend to far out of image
2300 if (this_mode == NEWMV)
2301 assert(!clamp_mv2(&cur_mv[i], xd));
2303 clamp_mv2(&cur_mv[i], xd);
2305 if (mv_check_bounds(x, &cur_mv[i]))
2307 mbmi->mv[i].as_int = cur_mv[i].as_int;
2310 /* We don't include the cost of the second reference here, because there
2311 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2312 * words if you present them in that order, the second one is always known
2313 * if the first is known */
2314 *rate2 += vp9_cost_mv_ref(cpi, this_mode,
2315 mbmi->mb_mode_context[mbmi->ref_frame[0]]);
2318 interpolating_intpel_seen = 0;
2319 // Are all MVs integer pel for Y and UV
2320 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2321 (mbmi->mv[0].as_mv.col & 15) == 0;
2323 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2324 (mbmi->mv[1].as_mv.col & 15) == 0;
2325 // Search for best switchable filter by checking the variance of
2326 // pred error irrespective of whether the filter will be used
2327 if (cpi->sf.use_8tap_always) {
2328 *best_filter = EIGHTTAP;
2331 int tmp_rate_sum = 0;
2332 int64_t tmp_dist_sum = 0;
2333 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2335 const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
2336 const int is_intpel_interp = intpel_mv &&
2337 vp9_is_interpolating_filter[filter];
2338 mbmi->interp_filter = filter;
2339 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2341 if (cm->mcomp_filter_type == SWITCHABLE)
2342 rs = get_switchable_rate(cm, x);
2344 if (interpolating_intpel_seen && is_intpel_interp) {
2345 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
2348 int64_t dist_sum = 0;
2349 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2350 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2351 rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
2352 if (!interpolating_intpel_seen && is_intpel_interp) {
2353 tmp_rate_sum = rate_sum;
2354 tmp_dist_sum = dist_sum;
2357 newbest = i == 0 || rd < best_rd;
2361 *best_filter = mbmi->interp_filter;
2364 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2365 (cm->mcomp_filter_type != SWITCHABLE &&
2366 cm->mcomp_filter_type == mbmi->interp_filter)) {
2369 for (p = 0; p < MAX_MB_PLANE; p++) {
2370 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2371 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2374 for (i = 0; i < y; i++)
2375 vpx_memcpy(&tmp_buf[p][64 * i],
2376 xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x);
2380 interpolating_intpel_seen |= is_intpel_interp;
2384 // Set the appripriate filter
2385 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2386 cm->mcomp_filter_type : *best_filter;
2387 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2393 for (p = 0; p < MAX_MB_PLANE; p++) {
2394 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2395 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2398 for (i = 0; i < y; i++)
2399 vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
2400 &tmp_buf[p][64 * i], x);
2403 // Handles the special case when a filter that is not in the
2404 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2405 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2408 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2409 *rate2 += get_switchable_rate(cm, x);
2411 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2413 else if (x->encode_breakout) {
2414 unsigned int var, sse;
2415 int threshold = (xd->plane[0].dequant[1]
2416 * xd->plane[0].dequant[1] >> 4);
2418 if (threshold < x->encode_breakout)
2419 threshold = x->encode_breakout;
2421 var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
2422 x->plane[0].src.stride,
2423 xd->plane[0].dst.buf,
2424 xd->plane[0].dst.stride,
2427 if ((int)sse < threshold) {
2428 unsigned int q2dc = xd->plane[0].dequant[0];
2429 /* If there is no codeable 2nd order dc
2430 or a very small uniform pixel change change */
2431 if ((sse - var < q2dc * q2dc >> 4) ||
2432 (sse / 2 > var && sse - var < 64)) {
2433 // Check u and v to make sure skip is ok
2435 unsigned int sse2u, sse2v;
2436 var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
2437 x->plane[1].src.stride,
2438 xd->plane[1].dst.buf,
2439 xd->plane[1].dst.stride, &sse2u);
2440 var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
2441 x->plane[1].src.stride,
2442 xd->plane[2].dst.buf,
2443 xd->plane[1].dst.stride, &sse2v);
2444 sse2 = sse2u + sse2v;
2446 if (sse2 * 2 < threshold) {
2448 *distortion = sse + sse2;
2451 /* for best_yrd calculation */
2453 *distortion_uv = sse2;
2456 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2463 int skippable_y, skippable_uv;
2465 // Y cost and distortion
2466 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
2470 *distortion += *distortion_y;
2472 super_block_uvrd(cm, x, rate_uv, distortion_uv,
2473 &skippable_uv, bsize);
2476 *distortion += *distortion_uv;
2477 *skippable = skippable_y && skippable_uv;
2480 if (!(*mode_excluded)) {
2482 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2484 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2488 return this_rd; // if 0, this will be re-calculated by caller
2491 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2492 int *returnrate, int64_t *returndist,
2493 BLOCK_SIZE_TYPE bsize,
2494 PICK_MODE_CONTEXT *ctx) {
2495 VP9_COMMON *cm = &cpi->common;
2496 MACROBLOCKD *xd = &x->e_mbd;
2497 int rate_y = 0, rate_uv = 0;
2498 int rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2499 int64_t dist_y = 0, dist_uv = 0;
2500 int y_skip = 0, uv_skip = 0;
2501 int64_t txfm_cache[NB_TXFM_MODES], err;
2502 MB_PREDICTION_MODE mode;
2504 int rate4x4_y, rate4x4_y_tokenonly;
2506 int64_t err4x4 = INT64_MAX;
2509 vpx_memset(&txfm_cache,0,sizeof(txfm_cache));
2511 xd->mode_info_context->mbmi.mode = DC_PRED;
2512 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
2513 err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2514 &dist_y, &y_skip, bsize, txfm_cache);
2515 mode = xd->mode_info_context->mbmi.mode;
2516 txfm_size = xd->mode_info_context->mbmi.txfm_size;
2517 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
2519 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2521 if (bsize < BLOCK_SIZE_SB8X8)
2522 err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
2523 &rate4x4_y_tokenonly,
2526 if (y_skip && uv_skip) {
2527 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2528 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
2529 *returndist = dist_y + (dist_uv >> 2);
2530 memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2531 xd->mode_info_context->mbmi.mode = mode;
2532 xd->mode_info_context->mbmi.txfm_size = txfm_size;
2533 } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
2534 *returnrate = rate4x4_y + rate_uv +
2535 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2536 *returndist = dist4x4_y + (dist_uv >> 2);
2537 vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2538 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2540 *returnrate = rate_y + rate_uv +
2541 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2542 *returndist = dist_y + (dist_uv >> 2);
2543 for (i = 0; i < NB_TXFM_MODES; i++) {
2544 ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode];
2546 xd->mode_info_context->mbmi.txfm_size = txfm_size;
2547 xd->mode_info_context->mbmi.mode = mode;
2550 ctx->mic = *xd->mode_info_context;
2553 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2554 int mi_row, int mi_col,
2556 int64_t *returndistortion,
2557 BLOCK_SIZE_TYPE bsize,
2558 PICK_MODE_CONTEXT *ctx) {
2559 VP9_COMMON *cm = &cpi->common;
2560 MACROBLOCKD *xd = &x->e_mbd;
2561 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2562 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2563 MB_PREDICTION_MODE this_mode;
2564 MB_PREDICTION_MODE best_mode = DC_PRED;
2565 MV_REFERENCE_FRAME ref_frame;
2566 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
2568 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2569 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2570 int_mv single_newmv[MAX_REF_FRAMES];
2571 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2573 int idx_list[4] = {0,
2577 int64_t best_rd = INT64_MAX;
2578 int64_t best_txfm_rd[NB_TXFM_MODES];
2579 int64_t best_txfm_diff[NB_TXFM_MODES];
2580 int64_t best_pred_diff[NB_PREDICTION_TYPES];
2581 int64_t best_pred_rd[NB_PREDICTION_TYPES];
2582 MB_MODE_INFO best_mbmode;
2584 int mode_index, best_mode_index = 0;
2585 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2586 vp9_prob comp_mode_p;
2587 int64_t best_overall_rd = INT64_MAX;
2588 INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
2589 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
2590 int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
2591 int64_t dist_uv[TX_SIZE_MAX_SB];
2592 int skip_uv[TX_SIZE_MAX_SB];
2593 MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
2594 struct scale_factors scale_factor[4];
2595 unsigned int ref_frame_mask = 0;
2596 unsigned int mode_mask = 0;
2597 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
2598 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
2599 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
2600 cpi->common.y_dc_delta_q);
2601 int_mv seg_mvs[4][MAX_REF_FRAMES];
2602 union b_mode_info best_bmodes[4];
2603 PARTITION_INFO best_partition;
2604 int bwsl = b_width_log2(bsize);
2605 int bws = (1 << bwsl) / 4; // mode_info step for subsize
2606 int bhsl = b_height_log2(bsize);
2607 int bhs = (1 << bhsl) / 4; // mode_info step for subsize
2609 for (i = 0; i < 4; i++) {
2612 for (j = 0; j < MAX_REF_FRAMES; j++)
2613 seg_mvs[i][j].as_int = INVALID_MV;
2615 // Everywhere the flag is set the error is much higher than its neighbors.
2616 ctx->frames_with_high_error = 0;
2617 ctx->modes_with_high_error = 0;
2619 xd->mode_info_context->mbmi.segment_id = segment_id;
2620 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
2622 vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
2623 vpx_memset(&single_newmv, 0, sizeof(single_newmv));
2625 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
2626 best_pred_rd[i] = INT64_MAX;
2627 for (i = 0; i < NB_TXFM_MODES; i++)
2628 best_txfm_rd[i] = INT64_MAX;
2630 // Create a mask set to 1 for each frame used by a smaller resolution.
2631 if (cpi->sf.use_avoid_tested_higherror) {
2632 switch (block_size) {
2634 for (i = 0; i < 4; i++) {
2635 for (j = 0; j < 4; j++) {
2636 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
2637 mode_mask |= x->mb_context[i][j].modes_with_high_error;
2640 for (i = 0; i < 4; i++) {
2641 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
2642 mode_mask |= x->sb32_context[i].modes_with_high_error;
2646 for (i = 0; i < 4; i++) {
2648 x->mb_context[xd->sb_index][i].frames_with_high_error;
2649 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
2653 // Until we handle all block sizes set it to present;
2658 ref_frame_mask = ~ref_frame_mask;
2659 mode_mask = ~mode_mask;
2662 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
2663 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2664 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
2665 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
2666 yv12_mb, scale_factor);
2668 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2669 frame_mv[ZEROMV][ref_frame].as_int = 0;
2671 if (!cpi->sf.use_avoid_tested_higherror
2672 || (cpi->sf.use_avoid_tested_higherror
2673 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
2674 mbmi->mode = DC_PRED;
2675 mbmi->ref_frame[0] = INTRA_FRAME;
2676 for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
2677 (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
2678 (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
2680 mbmi->txfm_size = i;
2681 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
2682 &dist_uv[i], &skip_uv[i],
2683 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2685 mode_uv[i] = mbmi->uv_mode;
2689 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
2690 int mode_excluded = 0;
2691 int64_t this_rd = INT64_MAX;
2692 int disable_skip = 0;
2693 int compmode_cost = 0;
2694 int rate2 = 0, rate_y = 0, rate_uv = 0;
2695 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2697 int64_t txfm_cache[NB_TXFM_MODES];
2700 for (i = 0; i < NB_TXFM_MODES; ++i)
2701 txfm_cache[i] = INT64_MAX;
2703 // Test best rd so far against threshold for trying this mode.
2704 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
2705 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
2706 cpi->rd_threshes[bsize][mode_index] == INT_MAX)
2709 // Do not allow compound prediction if the segment level reference
2710 // frame feature is in use as in this case there can only be one reference.
2711 if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) &&
2712 vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME))
2716 this_mode = vp9_mode_order[mode_index].mode;
2717 ref_frame = vp9_mode_order[mode_index].ref_frame;
2719 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
2720 if (!(ref_frame_mask & (1 << ref_frame))) {
2723 if (!(mode_mask & (1 << this_mode))) {
2726 if (vp9_mode_order[mode_index].second_ref_frame != NONE
2728 & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
2733 mbmi->ref_frame[0] = ref_frame;
2734 mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame;
2736 if (!(ref_frame == INTRA_FRAME
2737 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
2740 if (!(mbmi->ref_frame[1] == NONE
2741 || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) {
2745 // TODO(jingning, jkoleszar): scaling reference frame not supported for
2747 if (mbmi->ref_frame[0] > 0 &&
2748 (scale_factor[mbmi->ref_frame[0]].x_scale_fp !=
2749 (1 << VP9_REF_SCALE_SHIFT) ||
2750 scale_factor[mbmi->ref_frame[0]].y_scale_fp !=
2751 (1 << VP9_REF_SCALE_SHIFT)) &&
2752 this_mode == SPLITMV)
2755 if (mbmi->ref_frame[1] > 0 &&
2756 (scale_factor[mbmi->ref_frame[1]].x_scale_fp !=
2757 (1 << VP9_REF_SCALE_SHIFT) ||
2758 scale_factor[mbmi->ref_frame[1]].y_scale_fp !=
2759 (1 << VP9_REF_SCALE_SHIFT)) &&
2760 this_mode == SPLITMV)
2763 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
2765 comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
2766 mbmi->mode = this_mode;
2767 mbmi->uv_mode = DC_PRED;
2769 // Evaluate all sub-pel filters irrespective of whether we can use
2770 // them for this frame.
2771 mbmi->interp_filter = cm->mcomp_filter_type;
2772 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2774 if (bsize >= BLOCK_SIZE_SB8X8 &&
2775 (this_mode == I4X4_PRED || this_mode == SPLITMV))
2777 if (bsize < BLOCK_SIZE_SB8X8 &&
2778 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
2782 if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))
2784 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
2789 mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
2791 // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1];
2792 if (ref_frame != INTRA_FRAME) {
2793 if (mbmi->ref_frame[1] != INTRA_FRAME)
2796 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
2800 // Select predictors
2801 for (i = 0; i < MAX_MB_PLANE; i++) {
2802 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2804 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
2807 // If the segment reference frame feature is enabled....
2808 // then do nothing if the current ref frame is not allowed..
2809 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
2810 vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
2812 // If the segment skip feature is enabled....
2813 // then do nothing if the current mode is not allowed..
2814 } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
2815 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
2817 // Disable this drop out case if the ref frame
2818 // segment level feature is enabled for this segment. This is to
2819 // prevent the possibility that we end up unable to pick any mode.
2820 } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
2821 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2822 // unless ARNR filtering is enabled in which case we want
2823 // an unfiltered alternative
2824 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2825 if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
2830 // TODO(JBB): This is to make up for the fact that we don't have sad
2831 // functions that work when the block size reads outside the umv. We
2832 // should fix this either by making the motion search just work on
2833 // a representative block in the boundary ( first ) and then implement a
2834 // function that does sads when inside the border..
2835 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
2836 this_mode == NEWMV) {
2840 if (this_mode == I4X4_PRED) {
2843 mbmi->txfm_size = TX_4X4;
2844 rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
2845 &distortion_y, INT64_MAX);
2847 rate2 += intra_cost_penalty;
2848 distortion2 += distortion_y;
2850 rate2 += rate_uv_intra[TX_4X4];
2851 rate_uv = rate_uv_intra[TX_4X4];
2852 distortion2 += dist_uv[TX_4X4];
2853 distortion_uv = dist_uv[TX_4X4];
2854 mbmi->uv_mode = mode_uv[TX_4X4];
2855 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2856 for (i = 0; i < NB_TXFM_MODES; ++i)
2857 txfm_cache[i] = txfm_cache[ONLY_4X4];
2858 } else if (ref_frame == INTRA_FRAME) {
2860 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2863 uv_tx = mbmi->txfm_size;
2864 if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
2866 if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
2868 else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
2871 rate_uv = rate_uv_intra[uv_tx];
2872 distortion_uv = dist_uv[uv_tx];
2873 skippable = skippable && skip_uv[uv_tx];
2874 mbmi->uv_mode = mode_uv[uv_tx];
2876 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv;
2877 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
2878 rate2 += intra_cost_penalty;
2879 distortion2 = distortion_y + distortion_uv;
2880 } else if (this_mode == SPLITMV) {
2881 const int is_comp_pred = mbmi->ref_frame[1] > 0;
2884 int64_t this_rd_thresh;
2885 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
2886 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
2887 int64_t tmp_best_distortion = INT_MAX;
2888 int tmp_best_skippable = 0;
2889 int switchable_filter_index;
2890 int_mv *second_ref = is_comp_pred ?
2891 &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL;
2892 union b_mode_info tmp_best_bmodes[16];
2893 MB_MODE_INFO tmp_best_mbmode;
2894 PARTITION_INFO tmp_best_partition;
2895 int pred_exists = 0;
2898 this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ?
2899 cpi->rd_threshes[bsize][THR_NEWMV] :
2900 cpi->rd_threshes[bsize][THR_NEWA];
2901 this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ?
2902 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
2903 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2905 for (switchable_filter_index = 0;
2906 switchable_filter_index < VP9_SWITCHABLE_FILTERS;
2907 ++switchable_filter_index) {
2909 mbmi->interp_filter =
2910 vp9_switchable_interp[switchable_filter_index];
2911 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2913 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2914 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
2915 second_ref, INT64_MAX,
2916 &rate, &rate_y, &distortion,
2918 (int)this_rd_thresh, seg_mvs,
2920 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2921 const int rs = get_switchable_rate(cm, x);
2922 tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
2924 newbest = (tmp_rd < tmp_best_rd);
2926 tmp_best_filter = mbmi->interp_filter;
2927 tmp_best_rd = tmp_rd;
2929 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
2930 (mbmi->interp_filter == cm->mcomp_filter_type &&
2931 cm->mcomp_filter_type != SWITCHABLE)) {
2932 tmp_best_rdu = tmp_rd;
2933 tmp_best_rate = rate;
2934 tmp_best_ratey = rate_y;
2935 tmp_best_distortion = distortion;
2936 tmp_best_skippable = skippable;
2937 tmp_best_mbmode = *mbmi;
2938 tmp_best_partition = *x->partition_info;
2939 for (i = 0; i < 4; i++)
2940 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
2943 } // switchable_filter_index loop
2945 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
2946 tmp_best_filter : cm->mcomp_filter_type);
2947 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2949 // Handles the special case when a filter that is not in the
2950 // switchable list (bilinear, 6-tap) is indicated at the frame level
2951 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2952 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
2953 second_ref, INT64_MAX,
2954 &rate, &rate_y, &distortion,
2956 (int)this_rd_thresh, seg_mvs,
2959 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2960 int rs = get_switchable_rate(cm, x);
2961 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
2963 tmp_rd = tmp_best_rdu;
2964 rate = tmp_best_rate;
2965 rate_y = tmp_best_ratey;
2966 distortion = tmp_best_distortion;
2967 skippable = tmp_best_skippable;
2968 *mbmi = tmp_best_mbmode;
2969 *x->partition_info = tmp_best_partition;
2970 for (i = 0; i < 4; i++)
2971 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
2975 distortion2 += distortion;
2977 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2978 rate2 += get_switchable_rate(cm, x);
2980 // If even the 'Y' rd value of split is higher than best so far
2981 // then dont bother looking at UV
2982 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
2984 vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
2985 super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
2986 &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4);
2988 distortion2 += distortion_uv;
2989 skippable = skippable && uv_skippable;
2991 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2992 for (i = 0; i < NB_TXFM_MODES; ++i)
2993 txfm_cache[i] = txfm_cache[ONLY_4X4];
2995 if (!mode_excluded) {
2997 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
2999 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
3002 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
3004 compmode_cost = vp9_cost_bit(comp_mode_p,
3005 mbmi->ref_frame[1] > INTRA_FRAME);
3006 this_rd = handle_inter_mode(cpi, x, bsize,
3008 &rate2, &distortion2, &skippable,
3009 &rate_y, &distortion_y,
3010 &rate_uv, &distortion_uv,
3011 &mode_excluded, &disable_skip,
3012 &tmp_best_filter, frame_mv[this_mode],
3015 if (this_rd == INT64_MAX)
3019 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3020 rate2 += compmode_cost;
3023 // Estimate the reference frame signaling cost and add it
3024 // to the rolling cost variable.
3025 if (mbmi->ref_frame[1] > INTRA_FRAME) {
3026 rate2 += ref_costs_comp[mbmi->ref_frame[0]];
3028 rate2 += ref_costs_single[mbmi->ref_frame[0]];
3031 if (!disable_skip) {
3032 // Test for the condition where skip block will be activated
3033 // because there are no non zero coefficients and make any
3034 // necessary adjustment for rate. Ignore if skip is coded at
3035 // segment level as the cost wont have been added in.
3036 int mb_skip_allowed;
3038 // Is Mb level skip allowed (i.e. not coded at segment level).
3039 mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
3041 if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
3042 // Back out the coefficient coding costs
3043 rate2 -= (rate_y + rate_uv);
3044 // for best_yrd calculation
3047 if (mb_skip_allowed) {
3050 // Cost the skip mb case
3051 vp9_prob skip_prob =
3052 vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
3055 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3056 rate2 += prob_skip_cost;
3059 } else if (mb_skip_allowed) {
3060 // Add in the cost of the no skip flag.
3061 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
3063 rate2 += prob_skip_cost;
3066 // Calculate the final RD estimate for this mode.
3067 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3071 // Keep record of best intra distortion
3072 if ((xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) &&
3073 (this_rd < best_intra_rd)) {
3074 best_intra_rd = this_rd;
3075 *returnintra = distortion2;
3079 if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME)
3080 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3081 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3083 if (this_rd < best_overall_rd) {
3084 best_overall_rd = this_rd;
3085 best_filter = tmp_best_filter;
3086 best_mode = this_mode;
3089 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3090 // Store the respective mode distortions for later use.
3091 if (mode_distortions[this_mode] == -1
3092 || distortion2 < mode_distortions[this_mode]) {
3093 mode_distortions[this_mode] = distortion2;
3095 if (frame_distortions[mbmi->ref_frame[0]] == -1
3096 || distortion2 < frame_distortions[mbmi->ref_frame[0]]) {
3097 frame_distortions[mbmi->ref_frame[0]] = distortion2;
3101 // Did this mode help.. i.e. is it the new best mode
3102 if (this_rd < best_rd || x->skip) {
3103 if (!mode_excluded) {
3104 // Note index of best mode so far
3105 best_mode_index = mode_index;
3107 if (ref_frame == INTRA_FRAME) {
3108 /* required for left and above block mv */
3109 mbmi->mv[0].as_int = 0;
3112 *returnrate = rate2;
3113 *returndistortion = distortion2;
3115 best_mbmode = *mbmi;
3116 best_partition = *x->partition_info;
3118 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3119 for (i = 0; i < 4; i++)
3120 best_bmodes[i] = xd->mode_info_context->bmi[i];
3123 // Testing this mode gave rise to an improvement in best error score.
3124 // Lower threshold a bit for next time
3125 cpi->rd_thresh_mult[mode_index] =
3126 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3127 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3128 cpi->rd_threshes[mode_index] =
3129 (cpi->rd_baseline_thresh[mode_index] >> 7)
3130 * cpi->rd_thresh_mult[mode_index];
3133 // If the mode did not help improve the best error case then
3134 // raise the threshold for testing that mode next time around.
3136 cpi->rd_thresh_mult[mode_index] += 4;
3138 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3139 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3141 cpi->rd_threshes[mode_index] =
3142 (cpi->rd_baseline_thresh[mode_index] >> 7)
3143 * cpi->rd_thresh_mult[mode_index];
3147 /* keep record of best compound/single-only prediction */
3148 if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) {
3149 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3151 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3152 single_rate = rate2 - compmode_cost;
3153 hybrid_rate = rate2;
3155 single_rate = rate2;
3156 hybrid_rate = rate2 + compmode_cost;
3159 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3160 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3162 if (mbmi->ref_frame[1] <= INTRA_FRAME &&
3163 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3164 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3165 } else if (mbmi->ref_frame[1] > INTRA_FRAME &&
3166 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3167 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3169 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3170 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3173 /* keep record of best txfm size */
3174 if (bsize < BLOCK_SIZE_SB32X32) {
3175 if (bsize < BLOCK_SIZE_MB16X16) {
3176 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3177 txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
3178 txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
3180 txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
3182 if (!mode_excluded && this_rd != INT64_MAX) {
3183 for (i = 0; i < NB_TXFM_MODES; i++) {
3184 int64_t adj_rd = INT64_MAX;
3185 if (this_mode != I4X4_PRED) {
3186 adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
3191 if (adj_rd < best_txfm_rd[i])
3192 best_txfm_rd[i] = adj_rd;
3196 if (x->skip && !mode_excluded)
3199 // Flag all modes that have a distortion thats > 2x the best we found at
3201 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3202 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3205 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3206 ctx->modes_with_high_error |= (1 << mode_index);
3210 // Flag all ref frames that have a distortion thats > 2x the best we found at
3212 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3213 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3214 ctx->frames_with_high_error |= (1 << ref_frame);
3218 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
3219 *returnrate = INT_MAX;
3220 *returndistortion = INT_MAX;
3224 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3225 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3226 (best_mbmode.ref_frame[0] == INTRA_FRAME));
3228 // Accumulate filter usage stats
3229 // TODO(agrange): Use RD criteria to select interpolation filter mode.
3230 if (is_inter_mode(best_mode))
3231 ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
3233 // Updating rd_thresh_freq_fact[] here means that the differnt
3234 // partition/block sizes are handled independently based on the best
3235 // choice for the current partition. It may well be better to keep a scaled
3236 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3237 // combination that wins out.
3238 if (cpi->sf.adpative_rd_thresh) {
3239 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3240 if (mode_index == best_mode_index) {
3241 cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
3243 cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
3244 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3245 (cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
3246 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3247 cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
3253 // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
3255 // Reduce the activation RD thresholds for the best choice mode
3256 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
3257 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
3258 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
3260 cpi->rd_thresh_mult[best_mode_index] =
3261 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
3262 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
3263 cpi->rd_threshes[best_mode_index] =
3264 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
3268 // This code forces Altref,0,0 and skip for the frame that overlays a
3269 // an alrtef unless Altref is filtered. However, this is unsafe if
3270 // segment level coding of ref frame is enabled for this segment.
3271 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
3272 cpi->is_src_frame_alt_ref &&
3273 (cpi->oxcf.arnr_max_frames == 0) &&
3274 (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame[0] != ALTREF_FRAME)
3275 && bsize >= BLOCK_SIZE_SB8X8) {
3276 mbmi->mode = ZEROMV;
3277 mbmi->ref_frame[0] = ALTREF_FRAME;
3278 mbmi->ref_frame[1] = NONE;
3279 mbmi->mv[0].as_int = 0;
3280 mbmi->uv_mode = DC_PRED;
3281 mbmi->mb_skip_coeff = 1;
3282 if (cm->txfm_mode == TX_MODE_SELECT) {
3283 if (bsize >= BLOCK_SIZE_SB32X32)
3284 mbmi->txfm_size = TX_32X32;
3285 else if (bsize >= BLOCK_SIZE_MB16X16)
3286 mbmi->txfm_size = TX_16X16;
3288 mbmi->txfm_size = TX_8X8;
3291 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3292 vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
3297 *mbmi = best_mbmode;
3298 if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
3299 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3300 for (i = 0; i < 4; i++)
3301 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3304 if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
3305 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3306 for (i = 0; i < 4; i++)
3307 xd->mode_info_context->bmi[i].as_mv[0].as_int =
3308 best_bmodes[i].as_mv[0].as_int;
3310 if (mbmi->ref_frame[1] > 0)
3311 for (i = 0; i < 4; i++)
3312 xd->mode_info_context->bmi[i].as_mv[1].as_int =
3313 best_bmodes[i].as_mv[1].as_int;
3315 *x->partition_info = best_partition;
3317 mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
3318 mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
3321 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3322 if (best_pred_rd[i] == INT64_MAX)
3323 best_pred_diff[i] = INT_MIN;
3325 best_pred_diff[i] = best_rd - best_pred_rd[i];
3329 for (i = 0; i < NB_TXFM_MODES; i++) {
3330 if (best_txfm_rd[i] == INT64_MAX)
3331 best_txfm_diff[i] = 0;
3333 best_txfm_diff[i] = best_rd - best_txfm_rd[i];
3336 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3340 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
3342 store_coding_context(x, ctx, best_mode_index,
3344 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3345 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3346 mbmi->ref_frame[1]][0],
3347 best_pred_diff, best_txfm_diff);