2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
17 #include "vp9/common/vp9_pragmas.h"
18 #include "vp9/encoder/vp9_tokenize.h"
19 #include "vp9/encoder/vp9_treewriter.h"
20 #include "vp9/encoder/vp9_onyx_int.h"
21 #include "vp9/encoder/vp9_modecosts.h"
22 #include "vp9/encoder/vp9_encodeintra.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_findnearmv.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_quantize.h"
30 #include "vp9/encoder/vp9_variance.h"
31 #include "vp9/encoder/vp9_mcomp.h"
32 #include "vp9/encoder/vp9_rdopt.h"
33 #include "vp9/encoder/vp9_ratectrl.h"
34 #include "vpx_mem/vpx_mem.h"
35 #include "vp9/common/vp9_systemdependent.h"
36 #include "vp9/encoder/vp9_encodemv.h"
37 #include "vp9/common/vp9_seg_common.h"
38 #include "vp9/common/vp9_pred_common.h"
39 #include "vp9/common/vp9_entropy.h"
41 #include "vp9/common/vp9_mvref_common.h"
42 #include "vp9/common/vp9_common.h"
44 #define INVALID_MV 0x80008000
46 /* Factor to weigh the rate for switchable interp filters */
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
49 DECLARE_ALIGNED(16, extern const uint8_t,
50 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
52 #define I4X4_PRED 0x8000
53 #define SPLITMV 0x10000
55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 {ZEROMV, LAST_FRAME, NONE},
57 {DC_PRED, INTRA_FRAME, NONE},
59 {NEARESTMV, LAST_FRAME, NONE},
60 {NEARMV, LAST_FRAME, NONE},
62 {ZEROMV, GOLDEN_FRAME, NONE},
63 {NEARESTMV, GOLDEN_FRAME, NONE},
65 {ZEROMV, ALTREF_FRAME, NONE},
66 {NEARESTMV, ALTREF_FRAME, NONE},
68 {NEARMV, GOLDEN_FRAME, NONE},
69 {NEARMV, ALTREF_FRAME, NONE},
71 {V_PRED, INTRA_FRAME, NONE},
72 {H_PRED, INTRA_FRAME, NONE},
73 {D45_PRED, INTRA_FRAME, NONE},
74 {D135_PRED, INTRA_FRAME, NONE},
75 {D117_PRED, INTRA_FRAME, NONE},
76 {D153_PRED, INTRA_FRAME, NONE},
77 {D27_PRED, INTRA_FRAME, NONE},
78 {D63_PRED, INTRA_FRAME, NONE},
80 {TM_PRED, INTRA_FRAME, NONE},
82 {NEWMV, LAST_FRAME, NONE},
83 {NEWMV, GOLDEN_FRAME, NONE},
84 {NEWMV, ALTREF_FRAME, NONE},
86 {SPLITMV, LAST_FRAME, NONE},
87 {SPLITMV, GOLDEN_FRAME, NONE},
88 {SPLITMV, ALTREF_FRAME, NONE},
90 {I4X4_PRED, INTRA_FRAME, NONE},
92 /* compound prediction modes */
93 {ZEROMV, LAST_FRAME, GOLDEN_FRAME},
94 {NEARESTMV, LAST_FRAME, GOLDEN_FRAME},
95 {NEARMV, LAST_FRAME, GOLDEN_FRAME},
97 {ZEROMV, ALTREF_FRAME, LAST_FRAME},
98 {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
99 {NEARMV, ALTREF_FRAME, LAST_FRAME},
101 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
102 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
103 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
105 {NEWMV, LAST_FRAME, GOLDEN_FRAME},
106 {NEWMV, ALTREF_FRAME, LAST_FRAME },
107 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
109 {SPLITMV, LAST_FRAME, GOLDEN_FRAME},
110 {SPLITMV, ALTREF_FRAME, LAST_FRAME },
111 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
114 static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
115 vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
116 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
119 for (t = TX_4X4; t <= TX_32X32; t++)
120 for (i = 0; i < BLOCK_TYPES; i++)
121 for (j = 0; j < REF_TYPES; j++)
122 for (k = 0; k < COEF_BANDS; k++)
123 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
124 vp9_prob probs[ENTROPY_NODES];
125 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
126 vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
128 #if CONFIG_BALANCED_COEFTREE
129 // Replace the eob node prob with a very small value so that the
130 // cost approximately equals the cost without the eob node
132 vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
134 vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
136 assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
137 cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
142 static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
143 0, 0, 0, 0, 0, 0, 0, 0,
144 0, 0, 0, 0, 0, 0, 0, 0,
145 0, 0, 0, 0, 0, 0, 0, 0, };
147 // 3* dc_qlookup[Q]*dc_qlookup[Q];
149 /* values are now correlated to quantizer */
150 static int sad_per_bit16lut[QINDEX_RANGE];
151 static int sad_per_bit4lut[QINDEX_RANGE];
153 void vp9_init_me_luts() {
156 // Initialize the sad lut tables using a formulaic calculation for now
157 // This is to make it easier to resolve the impact of experimental changes
158 // to the quantizer tables.
159 for (i = 0; i < QINDEX_RANGE; i++) {
160 sad_per_bit16lut[i] =
161 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
162 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
166 static int compute_rd_mult(int qindex) {
167 const int q = vp9_dc_quant(qindex, 0);
168 return (11 * q * q) >> 2;
171 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
172 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
173 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
177 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
180 vp9_clear_system_state(); // __asm emms;
182 // Further tests required to see if optimum is different
183 // for key frames, golden frames and arf frames.
184 // if (cpi->common.refresh_golden_frame ||
185 // cpi->common.refresh_alt_ref_frame)
186 qindex = clamp(qindex, 0, MAXQ);
188 cpi->RDMULT = compute_rd_mult(qindex);
189 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
190 if (cpi->twopass.next_iiratio > 31)
191 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
194 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
196 cpi->mb.errorperbit = cpi->RDMULT >> 6;
197 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
199 vp9_set_speed_features(cpi);
201 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
206 if (cpi->RDMULT > 1000) {
210 for (i = 0; i < MAX_MODES; i++) {
211 if (cpi->sf.thresh_mult[i] < INT_MAX) {
212 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
214 cpi->rd_threshes[i] = INT_MAX;
216 cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
221 for (i = 0; i < MAX_MODES; i++) {
222 if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
223 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
225 cpi->rd_threshes[i] = INT_MAX;
227 cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
231 fill_token_costs(cpi->mb.token_costs,
232 cpi->mb.token_costs_noskip,
233 cpi->common.fc.coef_probs);
235 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
236 vp9_cost_tokens(cpi->mb.partition_cost[i],
237 cpi->common.fc.partition_prob[i],
240 /*rough estimate for costing*/
241 vp9_init_mode_costs(cpi);
243 if (cpi->common.frame_type != KEY_FRAME) {
244 vp9_build_nmv_cost_table(
245 cpi->mb.nmvjointcost,
246 cpi->mb.e_mbd.allow_high_precision_mv ?
247 cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
248 &cpi->common.fc.nmvc,
249 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
253 int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
256 for (i = 0; i < block_size; i++) {
257 int this_diff = coeff[i] - dqcoeff[i];
258 error += this_diff * this_diff;
264 static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
265 int plane, int block, PLANE_TYPE type,
270 MACROBLOCKD *const xd = &mb->e_mbd;
271 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
275 const int *scan, *nb;
276 const int eob = xd->plane[plane].eobs[block];
277 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
279 const int ref = mbmi->ref_frame != INTRA_FRAME;
280 unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
281 mb->token_costs[tx_size][type][ref];
282 ENTROPY_CONTEXT above_ec, left_ec;
283 TX_TYPE tx_type = DCT_DCT;
285 const int segment_id = xd->mode_info_context->mbmi.segment_id;
286 unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
287 mb->token_costs_noskip[tx_size][type][ref];
289 int seg_eob, default_eob;
290 uint8_t token_cache[1024];
291 const uint8_t * band_translate;
293 // Check for consistency of tx_size with mode info
294 assert((!type && !plane) || (type && plane));
295 if (type == PLANE_TYPE_Y_WITH_DC) {
296 assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
298 TX_SIZE tx_size_uv = get_uv_tx_size(xd);
299 assert(tx_size == tx_size_uv);
304 tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
305 get_tx_type_4x4(xd, block) : DCT_DCT;
306 above_ec = A[0] != 0;
309 scan = get_scan_4x4(tx_type);
310 band_translate = vp9_coefband_trans_4x4;
314 const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
315 const int sz = 1 + b_width_log2(sb_type);
316 const int x = block & ((1 << sz) - 1), y = block - x;
317 TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
318 get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
319 above_ec = (A[0] + A[1]) != 0;
320 left_ec = (L[0] + L[1]) != 0;
321 scan = get_scan_8x8(tx_type);
323 band_translate = vp9_coefband_trans_8x8plus;
327 const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
328 const int sz = 2 + b_width_log2(sb_type);
329 const int x = block & ((1 << sz) - 1), y = block - x;
330 TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
331 get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
332 scan = get_scan_16x16(tx_type);
334 above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
335 left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
336 band_translate = vp9_coefband_trans_8x8plus;
340 scan = vp9_default_scan_32x32;
342 above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
343 left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
344 band_translate = vp9_coefband_trans_8x8plus;
350 assert(eob <= seg_eob);
352 pt = combine_entropy_contexts(above_ec, left_ec);
353 nb = vp9_get_coef_neighbors_handle(scan, &pad);
354 default_eob = seg_eob;
356 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
359 /* sanity check to ensure that we do not have spurious non-zero q values */
361 assert(qcoeff_ptr[scan[eob]] == 0);
364 for (c = 0; c < eob; c++) {
365 int v = qcoeff_ptr[scan[c]];
366 int t = vp9_dct_value_tokens_ptr[v].token;
367 int band = get_coef_band(band_translate, c);
369 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
371 if (!c || token_cache[scan[c - 1]]) // do not skip eob
372 cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
374 cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
375 token_cache[scan[c]] = vp9_pt_energy_class[t];
379 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
380 cost += mb->token_costs_noskip[tx_size][type][ref]
381 [get_coef_band(band_translate, c)]
386 // is eob first coefficient;
387 for (pt = 0; pt < (1 << tx_size); pt++) {
388 A[pt] = L[pt] = c > 0;
394 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
395 int (*r)[2], int *rate,
396 int *d, int *distortion,
398 int64_t txfm_cache[NB_TXFM_MODES],
399 TX_SIZE max_txfm_size) {
400 VP9_COMMON *const cm = &cpi->common;
401 MACROBLOCKD *const xd = &x->e_mbd;
402 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
403 vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
404 int64_t rd[TX_SIZE_MAX_SB][2];
408 for (n = TX_4X4; n <= max_txfm_size; n++) {
410 for (m = 0; m <= n - (n == max_txfm_size); m++) {
412 r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
414 r[n][1] += vp9_cost_one(cm->prob_tx[m]);
418 assert(skip_prob > 0);
419 s0 = vp9_cost_bit(skip_prob, 0);
420 s1 = vp9_cost_bit(skip_prob, 1);
422 for (n = TX_4X4; n <= max_txfm_size; n++) {
424 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
426 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
427 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
431 if (max_txfm_size == TX_32X32 &&
432 (cm->txfm_mode == ALLOW_32X32 ||
433 (cm->txfm_mode == TX_MODE_SELECT &&
434 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
435 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
436 mbmi->txfm_size = TX_32X32;
437 } else if (max_txfm_size >= TX_16X16 &&
438 (cm->txfm_mode == ALLOW_16X16 ||
439 cm->txfm_mode == ALLOW_32X32 ||
440 (cm->txfm_mode == TX_MODE_SELECT &&
441 rd[TX_16X16][1] < rd[TX_8X8][1] &&
442 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
443 mbmi->txfm_size = TX_16X16;
444 } else if (cm->txfm_mode == ALLOW_8X8 ||
445 cm->txfm_mode == ALLOW_16X16 ||
446 cm->txfm_mode == ALLOW_32X32 ||
447 (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
448 mbmi->txfm_size = TX_8X8;
450 mbmi->txfm_size = TX_4X4;
453 *distortion = d[mbmi->txfm_size];
454 *rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
455 *skip = s[mbmi->txfm_size];
457 txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
458 txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
459 txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
460 txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
461 if (max_txfm_size == TX_32X32 &&
462 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
463 rd[TX_32X32][1] < rd[TX_4X4][1])
464 txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
465 else if (max_txfm_size >= TX_16X16 &&
466 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
467 txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
469 txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
470 rd[TX_4X4][1] : rd[TX_8X8][1];
473 static int block_error(int16_t *coeff, int16_t *dqcoeff,
474 int block_size, int shift) {
478 for (i = 0; i < block_size; i++) {
479 int this_diff = coeff[i] - dqcoeff[i];
480 error += (unsigned)this_diff * this_diff;
484 return error > INT_MAX ? INT_MAX : (int)error;
487 static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
488 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
489 return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
490 16 << (bwl + bhl), shift);
493 static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
494 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
498 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
499 const int subsampling = x->e_mbd.plane[plane].subsampling_x +
500 x->e_mbd.plane[plane].subsampling_y;
501 sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
502 16 << (bwl + bhl - subsampling), 0);
505 return sum > INT_MAX ? INT_MAX : (int)sum;
508 struct rdcost_block_args {
511 ENTROPY_CONTEXT t_above[16];
512 ENTROPY_CONTEXT t_left[16];
519 static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
520 int ss_txfrm_size, void *arg) {
521 struct rdcost_block_args* args = arg;
523 MACROBLOCKD * const xd = &args->x->e_mbd;
525 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
528 args->cost += cost_coeffs(args->cm, args->x, plane, block,
529 xd->plane[plane].plane_type, args->t_above + x_idx,
530 args->t_left + y_idx, args->tx_size,
531 args->bw * args->bh);
534 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
535 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
536 MACROBLOCKD * const xd = &x->e_mbd;
537 const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
538 const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
539 const int bw = 1 << bwl, bh = 1 << bhl;
540 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
542 vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
543 sizeof(ENTROPY_CONTEXT) * bw);
544 vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
545 sizeof(ENTROPY_CONTEXT) * bh);
547 foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
552 static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
553 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
556 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
557 cost += rdcost_plane(cm, x, plane, bsize, tx_size);
562 static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
563 int *rate, int *distortion, int *skippable,
564 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
565 MACROBLOCKD *const xd = &x->e_mbd;
566 xd->mode_info_context->mbmi.txfm_size = tx_size;
568 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
569 vp9_encode_intra_block_y(cm, x, bsize);
571 vp9_xform_quant_sby(cm, x, bsize);
573 *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
574 *rate = rdcost_plane(cm, x, 0, bsize, tx_size);
575 *skippable = vp9_sby_is_skippable(xd, bsize);
578 static void super_block_yrd(VP9_COMP *cpi,
579 MACROBLOCK *x, int *rate, int *distortion,
580 int *skip, BLOCK_SIZE_TYPE bs,
581 int64_t txfm_cache[NB_TXFM_MODES]) {
582 VP9_COMMON *const cm = &cpi->common;
583 int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
584 MACROBLOCKD *xd = &x->e_mbd;
585 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
587 if (mbmi->ref_frame > INTRA_FRAME)
588 vp9_subtract_sby(x, bs);
590 if (cpi->speed > 4) {
591 if (bs >= BLOCK_SIZE_SB32X32) {
592 mbmi->txfm_size = TX_32X32;
593 } else if (bs >= BLOCK_SIZE_MB16X16) {
594 mbmi->txfm_size = TX_16X16;
595 } else if (bs >= BLOCK_SIZE_SB8X8) {
596 mbmi->txfm_size = TX_8X8;
598 mbmi->txfm_size = TX_4X4;
600 vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
601 super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
605 if (bs >= BLOCK_SIZE_SB32X32)
606 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
608 if (bs >= BLOCK_SIZE_MB16X16)
609 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
611 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
613 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
616 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
617 TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
618 - (bs < BLOCK_SIZE_MB16X16));
621 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
622 MB_PREDICTION_MODE *best_mode,
624 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
625 int *bestrate, int *bestratey,
627 BLOCK_SIZE_TYPE bsize) {
628 MB_PREDICTION_MODE mode;
629 MACROBLOCKD *xd = &x->e_mbd;
630 int64_t best_rd = INT64_MAX;
633 VP9_COMMON *const cm = &cpi->common;
634 const int src_stride = x->plane[0].src.stride;
636 int16_t *src_diff, *coeff;
638 ENTROPY_CONTEXT ta[2], tempa[2];
639 ENTROPY_CONTEXT tl[2], templ[2];
640 TX_TYPE tx_type = DCT_DCT;
641 TX_TYPE best_tx_type = DCT_DCT;
642 int bw = 1 << b_width_log2(bsize);
643 int bh = 1 << b_height_log2(bsize);
645 DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
649 vpx_memcpy(ta, a, sizeof(ta));
650 vpx_memcpy(tl, l, sizeof(tl));
651 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
653 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
657 rate = bmode_costs[mode];
660 vpx_memcpy(tempa, ta, sizeof(ta));
661 vpx_memcpy(templ, tl, sizeof(tl));
663 for (idy = 0; idy < bh; ++idy) {
664 for (idx = 0; idx < bw; ++idx) {
665 block = ib + idy * 2 + idx;
666 xd->mode_info_context->bmi[block].as_mode.first = mode;
667 src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
668 x->plane[0].src.buf, src_stride);
669 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
670 x->plane[0].src_diff);
671 coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
672 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
673 xd->plane[0].dst.buf,
674 xd->plane[0].dst.stride);
675 vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
676 dst, xd->plane[0].dst.stride);
677 vp9_subtract_block(4, 4, src_diff, 8,
679 dst, xd->plane[0].dst.stride);
681 tx_type = get_tx_type_4x4(xd, block);
682 if (tx_type != DCT_DCT) {
683 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
684 x->quantize_b_4x4(x, block, tx_type, 16);
686 x->fwd_txm4x4(src_diff, coeff, 16);
687 x->quantize_b_4x4(x, block, tx_type, 16);
690 ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
691 tempa + idx, templ + idy, TX_4X4, 16);
692 distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
693 block, 16), 16) >> 2;
695 if (best_tx_type != DCT_DCT)
696 vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
697 dst, xd->plane[0].dst.stride, best_tx_type);
699 xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
700 dst, xd->plane[0].dst.stride);
705 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
707 if (this_rd < best_rd) {
710 *bestdistortion = distortion;
713 best_tx_type = tx_type;
714 vpx_memcpy(a, tempa, sizeof(tempa));
715 vpx_memcpy(l, templ, sizeof(templ));
716 for (idy = 0; idy < bh; ++idy) {
717 for (idx = 0; idx < bw; ++idx) {
718 block = ib + idy * 2 + idx;
719 vpx_memcpy(best_dqcoeff[idy * 2 + idx],
720 BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
721 sizeof(best_dqcoeff[0]));
727 for (idy = 0; idy < bh; ++idy) {
728 for (idx = 0; idx < bw; ++idx) {
729 block = ib + idy * 2 + idx;
730 xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
731 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
732 xd->plane[0].dst.buf,
733 xd->plane[0].dst.stride);
735 vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
736 dst, xd->plane[0].dst.stride);
738 if (best_tx_type != DCT_DCT)
739 vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
740 xd->plane[0].dst.stride, best_tx_type);
742 xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
743 xd->plane[0].dst.stride);
750 static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
751 int *Rate, int *rate_y,
752 int *Distortion, int64_t best_rd) {
754 MACROBLOCKD *const xd = &mb->e_mbd;
755 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
756 int bw = 1 << b_width_log2(bsize);
757 int bh = 1 << b_height_log2(bsize);
762 int64_t total_rd = 0;
763 ENTROPY_CONTEXT t_above[4], t_left[4];
765 MODE_INFO *const mic = xd->mode_info_context;
767 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
768 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
770 bmode_costs = mb->mbmode_cost;
772 for (idy = 0; idy < 2; idy += bh) {
773 for (idx = 0; idx < 2; idx += bw) {
774 const int mis = xd->mode_info_stride;
775 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
776 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
777 int UNINITIALIZED_IS_SAFE(d);
780 if (xd->frame_type == KEY_FRAME) {
781 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
782 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
783 left_block_mode(mic, i) : DC_PRED;
785 bmode_costs = mb->y_mode_costs[A][L];
788 total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
789 t_above + idx, t_left + idy,
795 mic->bmi[i].as_mode.first = best_mode;
796 for (j = 1; j < bh; ++j)
797 mic->bmi[i + j * 2].as_mode.first = best_mode;
798 for (j = 1; j < bw; ++j)
799 mic->bmi[i + j].as_mode.first = best_mode;
801 if (total_rd >= best_rd)
806 if (total_rd >= best_rd)
810 *rate_y = tot_rate_y;
811 *Distortion = distortion;
812 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
814 return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
817 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
818 int *rate, int *rate_tokenonly,
819 int *distortion, int *skippable,
820 BLOCK_SIZE_TYPE bsize,
821 int64_t txfm_cache[NB_TXFM_MODES]) {
822 MB_PREDICTION_MODE mode;
823 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
824 MACROBLOCKD *const xd = &x->e_mbd;
825 int this_rate, this_rate_tokenonly;
826 int this_distortion, s;
827 int64_t best_rd = INT64_MAX, this_rd;
828 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
830 int *bmode_costs = x->mbmode_cost;
832 if (bsize < BLOCK_SIZE_SB8X8) {
833 x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
837 for (i = 0; i < NB_TXFM_MODES; i++)
838 txfm_cache[i] = INT64_MAX;
840 /* Y Search for 32x32 intra prediction mode */
841 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
842 int64_t local_txfm_cache[NB_TXFM_MODES];
843 MODE_INFO *const mic = xd->mode_info_context;
844 const int mis = xd->mode_info_stride;
846 if (cpi->common.frame_type == KEY_FRAME) {
847 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
848 const MB_PREDICTION_MODE L = xd->left_available ?
849 left_block_mode(mic, 0) : DC_PRED;
851 bmode_costs = x->y_mode_costs[A][L];
853 x->e_mbd.mode_info_context->mbmi.mode = mode;
855 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
856 bsize, local_txfm_cache);
858 this_rate = this_rate_tokenonly + bmode_costs[mode];
859 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
861 if (this_rd < best_rd) {
862 mode_selected = mode;
864 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size;
866 *rate_tokenonly = this_rate_tokenonly;
867 *distortion = this_distortion;
871 for (i = 0; i < NB_TXFM_MODES; i++) {
872 int64_t adj_rd = this_rd + local_txfm_cache[i] -
873 local_txfm_cache[cpi->common.txfm_mode];
874 if (adj_rd < txfm_cache[i]) {
875 txfm_cache[i] = adj_rd;
880 x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
881 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
886 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
887 int *rate, int *distortion,
888 int *skippable, BLOCK_SIZE_TYPE bsize,
889 TX_SIZE uv_tx_size) {
890 MACROBLOCKD *const xd = &x->e_mbd;
891 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
892 vp9_encode_intra_block_uv(cm, x, bsize);
894 vp9_xform_quant_sbuv(cm, x, bsize);
896 *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
897 *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
898 *skippable = vp9_sbuv_is_skippable(xd, bsize);
901 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
902 int *rate, int *distortion, int *skippable,
903 BLOCK_SIZE_TYPE bsize) {
904 MACROBLOCKD *const xd = &x->e_mbd;
905 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
907 if (mbmi->ref_frame > INTRA_FRAME)
908 vp9_subtract_sbuv(x, bsize);
910 if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
911 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
913 } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
914 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
916 } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) {
917 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
920 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
925 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
926 int *rate, int *rate_tokenonly,
927 int *distortion, int *skippable,
928 BLOCK_SIZE_TYPE bsize) {
929 MB_PREDICTION_MODE mode;
930 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
931 int64_t best_rd = INT64_MAX, this_rd;
932 int this_rate_tokenonly, this_rate;
933 int this_distortion, s;
935 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
936 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
937 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
938 &this_distortion, &s, bsize);
939 this_rate = this_rate_tokenonly +
940 x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
941 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
943 if (this_rd < best_rd) {
944 mode_selected = mode;
947 *rate_tokenonly = this_rate_tokenonly;
948 *distortion = this_distortion;
953 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
958 int vp9_cost_mv_ref(VP9_COMP *cpi,
959 MB_PREDICTION_MODE m,
960 const int mode_context) {
961 MACROBLOCKD *xd = &cpi->mb.e_mbd;
962 int segment_id = xd->mode_info_context->mbmi.segment_id;
964 // Dont account for mode here if segment skip is enabled.
965 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
966 VP9_COMMON *pc = &cpi->common;
968 vp9_prob p[VP9_INTER_MODES - 1];
969 assert(NEARESTMV <= m && m <= NEWMV);
970 vp9_mv_ref_probs(pc, p, mode_context);
971 return cost_token(vp9_sb_mv_ref_tree, p,
972 vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
977 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
978 x->e_mbd.mode_info_context->mbmi.mode = mb;
979 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
982 static int labels2mode(MACROBLOCK *x, int i,
983 MB_PREDICTION_MODE this_mode,
984 int_mv *this_mv, int_mv *this_second_mv,
985 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
986 int_mv seg_mvs[MAX_REF_FRAMES],
988 int_mv *second_best_ref_mv,
989 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
990 MACROBLOCKD *const xd = &x->e_mbd;
991 MODE_INFO *const mic = xd->mode_info_context;
992 MB_MODE_INFO * mbmi = &mic->mbmi;
993 int cost = 0, thismvcost = 0;
995 int bw = 1 << b_width_log2(mbmi->sb_type);
996 int bh = 1 << b_height_log2(mbmi->sb_type);
998 /* We have to be careful retrieving previously-encoded motion vectors.
999 Ones from this macroblock have to be pulled from the BLOCKD array
1000 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1001 MB_PREDICTION_MODE m;
1003 // the only time we should do costing for new motion vector or mode
1004 // is when we are on a new label (jbb May 08, 2007)
1005 switch (m = this_mode) {
1007 this_mv->as_int = seg_mvs[mbmi->ref_frame].as_int;
1008 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1009 102, xd->allow_high_precision_mv);
1010 if (mbmi->second_ref_frame > 0) {
1011 this_second_mv->as_int = seg_mvs[mbmi->second_ref_frame].as_int;
1012 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1013 mvjcost, mvcost, 102,
1014 xd->allow_high_precision_mv);
1018 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int;
1019 if (mbmi->second_ref_frame > 0)
1020 this_second_mv->as_int =
1021 frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int;
1024 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int;
1025 if (mbmi->second_ref_frame > 0)
1026 this_second_mv->as_int =
1027 frame_mv[NEARMV][mbmi->second_ref_frame].as_int;
1030 this_mv->as_int = 0;
1031 if (mbmi->second_ref_frame > 0)
1032 this_second_mv->as_int = 0;
1038 cost = vp9_cost_mv_ref(cpi, this_mode,
1039 mbmi->mb_mode_context[mbmi->ref_frame]);
1041 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1042 if (mbmi->second_ref_frame > 0)
1043 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1045 x->partition_info->bmi[i].mode = m;
1046 x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
1047 if (mbmi->second_ref_frame > 0)
1048 x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
1049 for (idy = 0; idy < bh; ++idy) {
1050 for (idx = 0; idx < bw; ++idx) {
1051 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1052 &mic->bmi[i], sizeof(mic->bmi[i]));
1053 vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
1054 &x->partition_info->bmi[i],
1055 sizeof(x->partition_info->bmi[i]));
1063 static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
1068 ENTROPY_CONTEXT *ta,
1069 ENTROPY_CONTEXT *tl) {
1071 MACROBLOCKD *xd = &x->e_mbd;
1072 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1073 int bwl = b_width_log2(bsize), bw = 1 << bwl;
1074 int bhl = b_height_log2(bsize), bh = 1 << bhl;
1076 const int src_stride = x->plane[0].src.stride;
1077 uint8_t* const src =
1078 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1079 x->plane[0].src.buf, src_stride);
1081 raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
1082 x->plane[0].src_diff);
1083 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
1084 uint8_t* const pre =
1085 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1086 xd->plane[0].pre[0].buf,
1087 xd->plane[0].pre[0].stride);
1088 uint8_t* const dst =
1089 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1090 xd->plane[0].dst.buf,
1091 xd->plane[0].dst.stride);
1092 int thisdistortion = 0;
1098 vp9_build_inter_predictor(pre,
1099 xd->plane[0].pre[0].stride,
1101 xd->plane[0].dst.stride,
1102 &xd->mode_info_context->bmi[i].as_mv[0],
1103 &xd->scale_factor[0],
1104 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
1106 // TODO(debargha): Make this work properly with the
1107 // implicit-compoundinter-weight experiment when implicit
1108 // weighting for splitmv modes is turned on.
1109 if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
1110 uint8_t* const second_pre =
1111 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1112 xd->plane[0].pre[1].buf,
1113 xd->plane[0].pre[1].stride);
1114 vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
1115 dst, xd->plane[0].dst.stride,
1116 &xd->mode_info_context->bmi[i].as_mv[1],
1117 &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
1121 vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
1123 dst, xd->plane[0].dst.stride);
1126 for (idy = 0; idy < bh; ++idy) {
1127 for (idx = 0; idx < bw; ++idx) {
1128 k += (idy * 2 + idx);
1129 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1130 x->plane[0].src_diff);
1131 coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
1132 x->fwd_txm4x4(src_diff, coeff, 16);
1133 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1134 thisdistortion += vp9_block_error(coeff,
1135 BLOCK_OFFSET(xd->plane[0].dqcoeff,
1137 thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
1139 tl + (k >> 1), TX_4X4, 16);
1142 *distortion += thisdistortion;
1143 *labelyrate += thisrate;
1146 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1150 int_mv *ref_mv, *second_ref_mv;
1157 MB_PREDICTION_MODE modes[4];
1158 int_mv mvs[4], second_mvs[4];
1163 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1165 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1166 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1167 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1168 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1172 static enum BlockSize get_block_size(int bw, int bh) {
1173 if (bw == 4 && bh == 4)
1176 if (bw == 4 && bh == 8)
1179 if (bw == 8 && bh == 4)
1182 if (bw == 8 && bh == 8)
1185 if (bw == 8 && bh == 16)
1188 if (bw == 16 && bh == 8)
1191 if (bw == 16 && bh == 16)
1194 if (bw == 32 && bh == 32)
1197 if (bw == 32 && bh == 16)
1200 if (bw == 16 && bh == 32)
1203 if (bw == 64 && bh == 32)
1206 if (bw == 32 && bh == 64)
1209 if (bw == 64 && bh == 64)
1216 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1217 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1218 x->plane[0].src.buf =
1219 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1220 x->plane[0].src.buf,
1221 x->plane[0].src.stride);
1222 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1223 x->e_mbd.plane[0].pre[0].buf =
1224 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1225 x->e_mbd.plane[0].pre[0].buf,
1226 x->e_mbd.plane[0].pre[0].stride);
1227 if (mbmi->second_ref_frame)
1228 x->e_mbd.plane[0].pre[1].buf =
1229 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1230 x->e_mbd.plane[0].pre[1].buf,
1231 x->e_mbd.plane[0].pre[1].stride);
1234 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1235 struct buf_2d orig_pre[2]) {
1236 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1237 x->plane[0].src = orig_src;
1238 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1239 if (mbmi->second_ref_frame)
1240 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1243 static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1244 BLOCK_SIZE_TYPE bsize,
1246 YV12_BUFFER_CONFIG **scaled_ref_frame,
1247 int mi_row, int mi_col,
1248 int_mv single_newmv[MAX_REF_FRAMES]);
1250 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1252 int_mv seg_mvs[4][MAX_REF_FRAMES],
1253 int mi_row, int mi_col) {
1256 MB_PREDICTION_MODE this_mode;
1257 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1258 const int label_count = 4;
1259 int64_t this_segment_rd = 0, other_segment_rd;
1260 int label_mv_thresh;
1262 int sbr = 0, sbd = 0;
1263 int segmentyrate = 0;
1264 int best_eobs[4] = { 0 };
1265 BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1266 int bwl = b_width_log2(bsize), bw = 1 << bwl;
1267 int bhl = b_height_log2(bsize), bh = 1 << bhl;
1269 vp9_variance_fn_ptr_t *v_fn_ptr;
1270 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
1271 ENTROPY_CONTEXT t_above[4], t_left[4];
1272 ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
1274 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1275 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1277 v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
1279 // 64 makes this threshold really big effectively
1280 // making it so that we very rarely check mvs on
1281 // segments. setting this to 1 would make mv thresh
1282 // roughly equal to what it is for macroblocks
1283 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1285 // Segmentation method overheads
1286 other_segment_rd = this_segment_rd;
1288 for (idy = 0; idy < 2; idy += bh) {
1289 for (idx = 0; idx < 2; idx += bw) {
1290 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1291 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1292 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1293 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1294 int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
1295 MB_PREDICTION_MODE mode_selected = ZEROMV;
1296 int bestlabelyrate = 0;
1299 frame_mv[ZEROMV][mbmi->ref_frame].as_int = 0;
1300 frame_mv[ZEROMV][mbmi->second_ref_frame].as_int = 0;
1301 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1302 &frame_mv[NEARESTMV][mbmi->ref_frame],
1303 &frame_mv[NEARMV][mbmi->ref_frame],
1305 if (mbmi->second_ref_frame > 0)
1306 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1307 &frame_mv[NEARESTMV][mbmi->second_ref_frame],
1308 &frame_mv[NEARMV][mbmi->second_ref_frame],
1311 // search for the best motion vector on this segment
1312 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1316 ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
1317 const struct buf_2d orig_src = x->plane[0].src;
1318 struct buf_2d orig_pre[2];
1320 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1322 vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
1323 vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
1325 // motion search for newmv (single predictor case only)
1326 if (mbmi->second_ref_frame <= 0 && this_mode == NEWMV) {
1329 int thissme, bestsme = INT_MAX;
1330 int sadpb = x->sadperbit4;
1333 /* Is the best so far sufficiently good that we cant justify doing
1334 * and new motion search. */
1335 if (best_label_rd < label_mv_thresh)
1338 if (cpi->compressor_speed) {
1339 // use previous block's result as next block's MV predictor.
1342 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1345 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1350 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1352 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1353 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1355 // adjust src pointer for this block
1357 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1358 sadpb, further_steps, 0, v_fn_ptr,
1359 bsi->ref_mv, &mode_mv[NEWMV]);
1361 // Should we do a full search (best quality only)
1362 if (cpi->compressor_speed == 0) {
1363 /* Check if mvp_full is within the range. */
1364 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1365 x->mv_row_min, x->mv_row_max);
1367 thissme = cpi->full_search_sad(x, &mvp_full,
1368 sadpb, 16, v_fn_ptr,
1369 x->nmvjointcost, x->mvcost,
1372 if (thissme < bestsme) {
1374 mode_mv[NEWMV].as_int =
1375 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1377 /* The full search result is actually worse so re-instate the
1378 * previous best vector */
1379 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1380 mode_mv[NEWMV].as_int;
1384 if (bestsme < INT_MAX) {
1387 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1388 bsi->ref_mv, x->errorperbit, v_fn_ptr,
1389 x->nmvjointcost, x->mvcost,
1392 // safe motion search result for use in compound prediction
1393 seg_mvs[i][mbmi->ref_frame].as_int = mode_mv[NEWMV].as_int;
1396 // restore src pointers
1397 mi_buf_restore(x, orig_src, orig_pre);
1398 } else if (mbmi->second_ref_frame > 0 && this_mode == NEWMV) {
1399 if (seg_mvs[i][mbmi->second_ref_frame].as_int == INVALID_MV ||
1400 seg_mvs[i][mbmi->ref_frame ].as_int == INVALID_MV)
1403 // adjust src pointers
1405 if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
1406 iterative_motion_search(cpi, x, bsize, frame_mv[this_mode],
1408 mi_row, mi_col, seg_mvs[i]);
1409 seg_mvs[i][mbmi->ref_frame].as_int =
1410 frame_mv[this_mode][mbmi->ref_frame].as_int;
1411 seg_mvs[i][mbmi->second_ref_frame].as_int =
1412 frame_mv[this_mode][mbmi->second_ref_frame].as_int;
1414 // restore src pointers
1415 mi_buf_restore(x, orig_src, orig_pre);
1418 rate = labels2mode(x, i, this_mode, &mode_mv[this_mode],
1419 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1420 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1423 // Trap vectors that reach beyond the UMV borders
1424 if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1425 ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1426 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1427 ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1430 if (mbmi->second_ref_frame > 0 &&
1431 mv_check_bounds(x, &second_mode_mv[this_mode]))
1434 this_rd = encode_inter_mb_segment(&cpi->common,
1436 &distortion, t_above_s, t_left_s);
1437 this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1440 if (this_rd < best_label_rd) {
1443 bestlabelyrate = labelyrate;
1444 mode_selected = this_mode;
1445 best_label_rd = this_rd;
1446 best_eobs[i] = x->e_mbd.plane[0].eobs[i];
1447 vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
1448 vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
1450 } /*for each 4x4 mode*/
1452 vpx_memcpy(t_above, t_above_b, sizeof(t_above));
1453 vpx_memcpy(t_left, t_left_b, sizeof(t_left));
1455 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1456 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1457 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1462 segmentyrate += bestlabelyrate;
1463 this_segment_rd += best_label_rd;
1464 other_segment_rd += best_other_rd;
1466 for (j = 1; j < bh; ++j)
1467 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1468 &x->partition_info->bmi[i],
1469 sizeof(x->partition_info->bmi[i]));
1470 for (j = 1; j < bw; ++j)
1471 vpx_memcpy(&x->partition_info->bmi[i + j],
1472 &x->partition_info->bmi[i],
1473 sizeof(x->partition_info->bmi[i]));
1475 } /* for each label */
1477 if (this_segment_rd < bsi->segment_rd) {
1480 bsi->segment_yrate = segmentyrate;
1481 bsi->segment_rd = this_segment_rd;
1483 // store everything needed to come back to this!!
1484 for (i = 0; i < 4; i++) {
1485 bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1486 if (mbmi->second_ref_frame > 0)
1487 bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
1488 bsi->modes[i] = x->partition_info->bmi[i].mode;
1489 bsi->eobs[i] = best_eobs[i];
1494 static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
1495 int_mv *best_ref_mv,
1496 int_mv *second_best_ref_mv,
1500 int *returndistortion,
1501 int *skippable, int mvthresh,
1502 int_mv seg_mvs[4][MAX_REF_FRAMES],
1503 int mi_row, int mi_col) {
1506 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1508 vpx_memset(&bsi, 0, sizeof(bsi));
1510 bsi.segment_rd = best_rd;
1511 bsi.ref_mv = best_ref_mv;
1512 bsi.second_ref_mv = second_best_ref_mv;
1513 bsi.mvp.as_int = best_ref_mv->as_int;
1514 bsi.mvthresh = mvthresh;
1516 for (i = 0; i < 4; i++)
1517 bsi.modes[i] = ZEROMV;
1519 rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col);
1521 /* set it to the best */
1522 for (i = 0; i < 4; i++) {
1523 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
1524 if (mbmi->second_ref_frame > 0)
1525 x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
1526 bsi.second_mvs[i].as_int;
1527 x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
1530 /* save partitions */
1531 x->partition_info->count = 4;
1533 for (i = 0; i < x->partition_info->count; i++) {
1534 x->partition_info->bmi[i].mode = bsi.modes[i];
1535 x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv;
1536 if (mbmi->second_ref_frame > 0)
1537 x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv;
1540 * used to set mbmi->mv.as_int
1542 x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int;
1543 if (mbmi->second_ref_frame > 0)
1544 x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int;
1546 *returntotrate = bsi.r;
1547 *returndistortion = bsi.d;
1548 *returnyrate = bsi.segment_yrate;
1549 *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
1550 mbmi->mode = bsi.modes[3];
1552 return (int)(bsi.segment_rd);
1555 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
1556 uint8_t *ref_y_buffer, int ref_y_stride,
1557 int ref_frame, enum BlockSize block_size ) {
1558 MACROBLOCKD *xd = &x->e_mbd;
1559 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1564 int best_sad = INT_MAX;
1565 int this_sad = INT_MAX;
1567 uint8_t *src_y_ptr = x->plane[0].src.buf;
1569 int row_offset, col_offset;
1571 // Get the sad for each candidate reference mv
1572 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
1573 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
1575 // The list is at an end if we see 0 for a second time.
1576 if (!this_mv.as_int && zero_seen)
1578 zero_seen = zero_seen || !this_mv.as_int;
1580 row_offset = this_mv.as_mv.row >> 3;
1581 col_offset = this_mv.as_mv.col >> 3;
1582 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
1584 // Find sad for current vector.
1585 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
1586 ref_y_ptr, ref_y_stride,
1589 // Note if it is the best so far.
1590 if (this_sad < best_sad) {
1591 best_sad = this_sad;
1596 // Note the index of the mv that worked best in the reference list.
1597 x->mv_best_ref_index[ref_frame] = best_index;
1600 extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
1601 static void estimate_curframe_refprobs(VP9_COMP *cpi,
1602 vp9_prob mod_refprobs[3],
1604 int norm_cnt[MAX_REF_FRAMES];
1605 const int *const rfct = cpi->count_mb_ref_frame_usage;
1606 int intra_count = rfct[INTRA_FRAME];
1607 int last_count = rfct[LAST_FRAME];
1608 int gf_count = rfct[GOLDEN_FRAME];
1609 int arf_count = rfct[ALTREF_FRAME];
1611 // Work out modified reference frame probabilities to use where prediction
1612 // of the reference frame fails
1613 if (pred_ref == INTRA_FRAME) {
1615 norm_cnt[1] = last_count;
1616 norm_cnt[2] = gf_count;
1617 norm_cnt[3] = arf_count;
1618 vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1619 mod_refprobs[0] = 0; // This branch implicit
1620 } else if (pred_ref == LAST_FRAME) {
1621 norm_cnt[0] = intra_count;
1623 norm_cnt[2] = gf_count;
1624 norm_cnt[3] = arf_count;
1625 vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1626 mod_refprobs[1] = 0; // This branch implicit
1627 } else if (pred_ref == GOLDEN_FRAME) {
1628 norm_cnt[0] = intra_count;
1629 norm_cnt[1] = last_count;
1631 norm_cnt[3] = arf_count;
1632 vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1633 mod_refprobs[2] = 0; // This branch implicit
1635 norm_cnt[0] = intra_count;
1636 norm_cnt[1] = last_count;
1637 norm_cnt[2] = gf_count;
1639 vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1640 mod_refprobs[2] = 0; // This branch implicit
1644 static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
1645 int idx, int val, int weight) {
1646 unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
1647 unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
1648 // weight is 16-bit fixed point, so this basically calculates:
1649 // 0.5 + weight * cost1 + (1.0 - weight) * cost0
1650 return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
1653 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
1654 unsigned int *ref_costs) {
1655 VP9_COMMON *cm = &cpi->common;
1656 MACROBLOCKD *xd = &cpi->mb.e_mbd;
1657 vp9_prob *mod_refprobs;
1665 vp9_prob pred_prob, new_pred_prob;
1667 int seg_ref_count = 0;
1668 seg_ref_active = vp9_segfeature_active(xd,
1672 if (seg_ref_active) {
1673 seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
1674 vp9_check_segref(xd, segment_id, LAST_FRAME) +
1675 vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
1676 vp9_check_segref(xd, segment_id, ALTREF_FRAME);
1679 // Get the predicted reference for this mb
1680 pred_ref = vp9_get_pred_ref(cm, xd);
1682 // Get the context probability for the prediction flag (based on last frame)
1683 pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
1685 // Predict probability for current frame based on stats so far
1686 pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF);
1687 new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0],
1688 cpi->ref_pred_count[pred_ctx][1]);
1690 // Get the set of probabilities to use if prediction fails
1691 mod_refprobs = cm->mod_refprobs[pred_ref];
1693 // For each possible selected reference frame work out a cost.
1694 for (i = 0; i < MAX_REF_FRAMES; i++) {
1695 if (seg_ref_active && seg_ref_count == 1) {
1698 pred_flag = (i == pred_ref);
1700 // Get the prediction for the current mb
1701 cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
1702 pred_flag, cpi->seg0_progress);
1703 if (cost > 1024) cost = 768; // i.e. account for 4 bits max.
1705 // for incorrectly predicted cases
1707 vp9_prob curframe_mod_refprobs[3];
1709 if (cpi->seg0_progress) {
1710 estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref);
1712 vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs));
1715 cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0,
1716 (i != INTRA_FRAME), cpi->seg0_progress);
1717 if (i != INTRA_FRAME) {
1718 cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1,
1719 (i != LAST_FRAME), cpi->seg0_progress);
1720 if (i != LAST_FRAME) {
1721 cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2,
1722 (i != GOLDEN_FRAME), cpi->seg0_progress);
1728 ref_costs[i] = cost;
1732 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1734 PARTITION_INFO *partition,
1736 int_mv *second_ref_mv,
1737 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
1738 int64_t txfm_size_diff[NB_TXFM_MODES]) {
1739 MACROBLOCKD *const xd = &x->e_mbd;
1741 // Take a snapshot of the coding context so it can be
1742 // restored if we decide to encode this way
1743 ctx->skip = x->skip;
1744 ctx->best_mode_index = mode_index;
1745 ctx->mic = *xd->mode_info_context;
1748 ctx->partition_info = *partition;
1750 ctx->best_ref_mv.as_int = ref_mv->as_int;
1751 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
1753 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
1754 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
1755 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
1757 memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
1760 static void setup_pred_block(const MACROBLOCKD *xd,
1761 struct buf_2d dst[MAX_MB_PLANE],
1762 const YV12_BUFFER_CONFIG *src,
1763 int mi_row, int mi_col,
1764 const struct scale_factors *scale,
1765 const struct scale_factors *scale_uv) {
1768 dst[0].buf = src->y_buffer;
1769 dst[0].stride = src->y_stride;
1770 dst[1].buf = src->u_buffer;
1771 dst[2].buf = src->v_buffer;
1772 dst[1].stride = dst[2].stride = src->uv_stride;
1774 dst[3].buf = src->alpha_buffer;
1775 dst[3].stride = src->alpha_stride;
1778 // TODO(jkoleszar): Make scale factors per-plane data
1779 for (i = 0; i < MAX_MB_PLANE; i++) {
1780 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
1781 i ? scale_uv : scale,
1782 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1786 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1787 int idx, MV_REFERENCE_FRAME frame_type,
1788 enum BlockSize block_size,
1789 int mi_row, int mi_col,
1790 int_mv frame_nearest_mv[MAX_REF_FRAMES],
1791 int_mv frame_near_mv[MAX_REF_FRAMES],
1792 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
1793 struct scale_factors scale[MAX_REF_FRAMES]) {
1794 VP9_COMMON *cm = &cpi->common;
1795 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
1796 MACROBLOCKD *const xd = &x->e_mbd;
1797 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1799 // set up scaling factors
1800 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
1801 scale[frame_type].x_offset_q4 =
1802 (mi_col * MI_SIZE * scale[frame_type].x_num /
1803 scale[frame_type].x_den) & 0xf;
1804 scale[frame_type].y_offset_q4 =
1805 (mi_row * MI_SIZE * scale[frame_type].y_num /
1806 scale[frame_type].y_den) & 0xf;
1808 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1809 // use the UV scaling factors.
1810 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
1811 &scale[frame_type], &scale[frame_type]);
1813 // Gets an initial list of candidate vectors from neighbours and orders them
1814 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
1815 xd->prev_mode_info_context,
1817 mbmi->ref_mvs[frame_type],
1818 cpi->common.ref_frame_sign_bias);
1820 // Candidate refinement carried out at encoder and decoder
1821 vp9_find_best_ref_mvs(xd,
1822 mbmi->ref_mvs[frame_type],
1823 &frame_nearest_mv[frame_type],
1824 &frame_near_mv[frame_type]);
1826 // Further refinement that is encode side only to test the top few candidates
1827 // in full and choose the best as the centre point for subsequent searches.
1828 // The current implementation doesn't support scaling.
1829 if (scale[frame_type].x_num == scale[frame_type].x_den &&
1830 scale[frame_type].y_num == scale[frame_type].y_den)
1831 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
1832 frame_type, block_size);
1835 static void model_rd_from_var_lapndz(int var, int n, int qstep,
1836 int *rate, int *dist) {
1837 // This function models the rate and distortion for a Laplacian
1838 // source with given variance when quantized with a uniform quantizer
1839 // with given stepsize. The closed form expressions are in:
1840 // Hang and Chen, "Source Model for transform video coder and its
1841 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
1842 // Sys. for Video Tech., April 1997.
1843 // The function is implemented as piecewise approximation to the
1844 // exact computation.
1845 // TODO(debargha): Implement the functions by interpolating from a
1847 vp9_clear_system_state();
1850 double s2 = (double) var / n;
1851 double s = sqrt(s2);
1852 double x = qstep / s;
1854 double y = exp(-x / 2);
1856 D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
1857 R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
1860 D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
1862 R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
1865 R = -1.442252874826093 * log(x) + 1.944647760719664;
1871 *rate = (n * R * 256 + 0.5);
1872 *dist = (n * D * s2 + 0.5);
1875 vp9_clear_system_state();
1878 static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
1879 struct macroblockd_plane *pd) {
1880 return get_block_size(plane_block_width(bsize, pd),
1881 plane_block_height(bsize, pd));
1884 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1885 MACROBLOCK *x, MACROBLOCKD *xd,
1886 int *out_rate_sum, int *out_dist_sum) {
1887 // Note our transform coeffs are 8 times an orthogonal transform.
1888 // Hence quantizer step is also 8 times. To get effective quantizer
1889 // we need to divide by 8 before sending to modeling function.
1890 unsigned int sse, var;
1891 int i, rate_sum = 0, dist_sum = 0;
1893 for (i = 0; i < MAX_MB_PLANE; ++i) {
1894 struct macroblock_plane *const p = &x->plane[i];
1895 struct macroblockd_plane *const pd = &xd->plane[i];
1897 // TODO(dkovalev) the same code in get_plane_block_size
1898 const int bw = plane_block_width(bsize, pd);
1899 const int bh = plane_block_height(bsize, pd);
1900 const enum BlockSize bs = get_block_size(bw, bh);
1902 var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
1903 pd->dst.buf, pd->dst.stride, &sse);
1904 model_rd_from_var_lapndz(var, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
1910 *out_rate_sum = rate_sum;
1911 *out_dist_sum = dist_sum;
1914 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
1915 MACROBLOCKD *xd = &x->e_mbd;
1916 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1918 const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
1919 const int m = vp9_switchable_interp_map[mbmi->interp_filter];
1920 return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
1923 static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1924 BLOCK_SIZE_TYPE bsize,
1926 YV12_BUFFER_CONFIG **scaled_ref_frame,
1927 int mi_row, int mi_col,
1928 int_mv single_newmv[MAX_REF_FRAMES]) {
1929 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
1930 MACROBLOCKD *xd = &x->e_mbd;
1931 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1932 int refs[2] = { mbmi->ref_frame,
1933 (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
1935 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
1937 // Prediction buffer from second frame.
1938 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1940 // Do joint motion search in compound mode to get more accurate mv.
1941 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
1942 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
1943 struct buf_2d scaled_first_yv12;
1944 int last_besterr[2] = {INT_MAX, INT_MAX};
1946 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
1947 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
1949 if (scaled_ref_frame[0]) {
1951 // Swap out the reference frame for a version that's been scaled to
1952 // match the resolution of the current frame, allowing the existing
1953 // motion search code to be used without additional modifications.
1954 for (i = 0; i < MAX_MB_PLANE; i++)
1955 backup_yv12[i] = xd->plane[i].pre[0];
1956 setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
1960 if (scaled_ref_frame[1]) {
1962 for (i = 0; i < MAX_MB_PLANE; i++)
1963 backup_second_yv12[i] = xd->plane[i].pre[1];
1965 setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
1969 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
1971 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
1973 scaled_first_yv12 = xd->plane[0].pre[0];
1975 // Initialize mv using single prediction mode result.
1976 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
1977 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
1979 // Allow joint search multiple times iteratively for each ref frame
1980 // and break out the search loop if it couldn't find better mv.
1981 for (ite = 0; ite < 4; ite++) {
1982 struct buf_2d ref_yv12[2];
1983 int bestsme = INT_MAX;
1984 int sadpb = x->sadperbit16;
1986 int search_range = 3;
1988 int tmp_col_min = x->mv_col_min;
1989 int tmp_col_max = x->mv_col_max;
1990 int tmp_row_min = x->mv_row_min;
1991 int tmp_row_max = x->mv_row_max;
1994 // Initialized here because of compiler problem in Visual Studio.
1995 ref_yv12[0] = xd->plane[0].pre[0];
1996 ref_yv12[1] = xd->plane[0].pre[1];
1998 // Get pred block from second frame.
1999 vp9_build_inter_predictor(ref_yv12[!id].buf,
2000 ref_yv12[!id].stride,
2002 &frame_mv[refs[!id]],
2003 &xd->scale_factor[!id],
2007 // Compound motion search on first ref frame.
2009 xd->plane[0].pre[0] = ref_yv12[id];
2010 vp9_clamp_mv_min_max(x, &ref_mv[id]);
2012 // Use mv result from single mode as mvp.
2013 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2015 tmp_mv.as_mv.col >>= 3;
2016 tmp_mv.as_mv.row >>= 3;
2018 // Small-range full-pixel motion search
2019 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2021 &cpi->fn_ptr[block_size],
2022 x->nmvjointcost, x->mvcost,
2023 &ref_mv[id], second_pred,
2026 x->mv_col_min = tmp_col_min;
2027 x->mv_col_max = tmp_col_max;
2028 x->mv_row_min = tmp_row_min;
2029 x->mv_row_max = tmp_row_max;
2031 if (bestsme < INT_MAX) {
2032 int dis; /* TODO: use dis in distortion calculation later. */
2035 bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
2038 &cpi->fn_ptr[block_size],
2039 x->nmvjointcost, x->mvcost,
2040 &dis, &sse, second_pred,
2045 xd->plane[0].pre[0] = scaled_first_yv12;
2047 if (bestsme < last_besterr[id]) {
2048 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2049 last_besterr[id] = bestsme;
2055 // restore the predictor
2056 if (scaled_ref_frame[0]) {
2058 for (i = 0; i < MAX_MB_PLANE; i++)
2059 xd->plane[i].pre[0] = backup_yv12[i];
2062 if (scaled_ref_frame[1]) {
2064 for (i = 0; i < MAX_MB_PLANE; i++)
2065 xd->plane[i].pre[1] = backup_second_yv12[i];
2068 vpx_free(second_pred);
2071 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2072 BLOCK_SIZE_TYPE bsize,
2073 int64_t txfm_cache[],
2074 int *rate2, int *distortion, int *skippable,
2076 int *rate_y, int *distortion_y,
2077 int *rate_uv, int *distortion_uv,
2078 int *mode_excluded, int *disable_skip,
2079 INTERPOLATIONFILTERTYPE *best_filter,
2081 YV12_BUFFER_CONFIG **scaled_ref_frame,
2082 int mi_row, int mi_col,
2083 int_mv single_newmv[MAX_REF_FRAMES]) {
2084 const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
2086 VP9_COMMON *cm = &cpi->common;
2087 MACROBLOCKD *xd = &x->e_mbd;
2088 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2089 const enum BlockSize uv_block_size = get_plane_block_size(bsize,
2091 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2092 const int is_comp_pred = (mbmi->second_ref_frame > 0);
2093 const int num_refs = is_comp_pred ? 2 : 1;
2094 const int this_mode = mbmi->mode;
2096 int refs[2] = { mbmi->ref_frame,
2097 (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
2100 int64_t this_rd = 0;
2101 unsigned char tmp_buf[MAX_MB_PLANE][64 * 64];
2102 int pred_exists = 0;
2103 int interpolating_intpel_seen = 0;
2105 int64_t rd, best_rd = INT64_MAX;
2107 switch (this_mode) {
2109 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2110 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2113 // Initialize mv using single prediction mode result.
2114 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2115 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2117 if (cpi->sf.comp_inter_joint_search_thresh < bsize)
2118 iterative_motion_search(cpi, x, bsize, frame_mv, scaled_ref_frame,
2119 mi_row, mi_col, single_newmv);
2121 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2122 frame_mv[refs[1]].as_int == INVALID_MV)
2124 *rate2 += vp9_mv_bit_cost(&frame_mv[refs[0]],
2126 x->nmvjointcost, x->mvcost, 96,
2127 x->e_mbd.allow_high_precision_mv);
2128 *rate2 += vp9_mv_bit_cost(&frame_mv[refs[1]],
2130 x->nmvjointcost, x->mvcost, 96,
2131 x->e_mbd.allow_high_precision_mv);
2133 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2134 int bestsme = INT_MAX;
2135 int further_steps, step_param = cpi->sf.first_step;
2136 int sadpb = x->sadperbit16;
2137 int_mv mvp_full, tmp_mv;
2140 int tmp_col_min = x->mv_col_min;
2141 int tmp_col_max = x->mv_col_max;
2142 int tmp_row_min = x->mv_row_min;
2143 int tmp_row_max = x->mv_row_max;
2145 if (scaled_ref_frame[0]) {
2148 // Swap out the reference frame for a version that's been scaled to
2149 // match the resolution of the current frame, allowing the existing
2150 // motion search code to be used without additional modifications.
2151 for (i = 0; i < MAX_MB_PLANE; i++)
2152 backup_yv12[i] = xd->plane[i].pre[0];
2154 setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
2158 vp9_clamp_mv_min_max(x, &ref_mv[0]);
2160 sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
2162 // mvp_full.as_int = ref_mv[0].as_int;
2164 mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
2166 mvp_full.as_mv.col >>= 3;
2167 mvp_full.as_mv.row >>= 3;
2169 // adjust search range according to sr from mv prediction
2170 step_param = MAX(step_param, sr);
2172 // Further step/diamond searches as necessary
2173 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2175 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2176 sadpb, further_steps, 1,
2177 &cpi->fn_ptr[block_size],
2178 &ref_mv[0], &tmp_mv);
2180 x->mv_col_min = tmp_col_min;
2181 x->mv_col_max = tmp_col_max;
2182 x->mv_row_min = tmp_row_min;
2183 x->mv_row_max = tmp_row_max;
2185 if (bestsme < INT_MAX) {
2186 int dis; /* TODO: use dis in distortion calculation later. */
2188 cpi->find_fractional_mv_step(x, &tmp_mv,
2191 &cpi->fn_ptr[block_size],
2192 x->nmvjointcost, x->mvcost,
2195 frame_mv[refs[0]].as_int = tmp_mv.as_int;
2196 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2198 // Add the new motion vector cost to our rolling cost variable
2199 *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
2200 x->nmvjointcost, x->mvcost,
2201 96, xd->allow_high_precision_mv);
2203 // restore the predictor, if required
2204 if (scaled_ref_frame[0]) {
2207 for (i = 0; i < MAX_MB_PLANE; i++)
2208 xd->plane[i].pre[0] = backup_yv12[i];
2218 for (i = 0; i < num_refs; ++i) {
2219 cur_mv[i] = frame_mv[refs[i]];
2220 // Clip "next_nearest" so that it does not extend to far out of image
2221 if (this_mode == NEWMV)
2222 assert(!clamp_mv2(&cur_mv[i], xd));
2224 clamp_mv2(&cur_mv[i], xd);
2226 if (mv_check_bounds(x, &cur_mv[i]))
2228 mbmi->mv[i].as_int = cur_mv[i].as_int;
2232 /* We don't include the cost of the second reference here, because there
2233 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2234 * words if you present them in that order, the second one is always known
2235 * if the first is known */
2236 *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP),
2238 *rate2 += vp9_cost_mv_ref(cpi, this_mode,
2239 mbmi->mb_mode_context[mbmi->ref_frame]);
2242 interpolating_intpel_seen = 0;
2243 // Are all MVs integer pel for Y and UV
2244 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2245 (mbmi->mv[0].as_mv.col & 15) == 0;
2247 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2248 (mbmi->mv[1].as_mv.col & 15) == 0;
2249 // Search for best switchable filter by checking the variance of
2250 // pred error irrespective of whether the filter will be used
2251 if (cpi->speed > 4) {
2252 *best_filter = EIGHTTAP;
2255 int tmp_rate_sum = 0, tmp_dist_sum = 0;
2256 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2258 const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
2259 const int is_intpel_interp = intpel_mv &&
2260 vp9_is_interpolating_filter[filter];
2261 mbmi->interp_filter = filter;
2262 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2264 if (cm->mcomp_filter_type == SWITCHABLE)
2265 rs = get_switchable_rate(cm, x);
2267 if (interpolating_intpel_seen && is_intpel_interp) {
2268 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
2270 int rate_sum = 0, dist_sum = 0;
2271 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2272 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2273 rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
2274 if (!interpolating_intpel_seen && is_intpel_interp) {
2275 tmp_rate_sum = rate_sum;
2276 tmp_dist_sum = dist_sum;
2279 newbest = i == 0 || rd < best_rd;
2283 *best_filter = mbmi->interp_filter;
2286 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2287 (cm->mcomp_filter_type != SWITCHABLE &&
2288 cm->mcomp_filter_type == mbmi->interp_filter)) {
2291 for (p = 0; p < MAX_MB_PLANE; p++) {
2292 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2293 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2296 for (i = 0; i < y; i++)
2297 vpx_memcpy(&tmp_buf[p][64 * i],
2298 xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x);
2302 interpolating_intpel_seen |= is_intpel_interp;
2306 // Set the appripriate filter
2307 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2308 cm->mcomp_filter_type : *best_filter;
2309 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2315 for (p = 0; p < MAX_MB_PLANE; p++) {
2316 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2317 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2320 for (i = 0; i < y; i++)
2321 vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
2322 &tmp_buf[p][64 * i], x);
2325 // Handles the special case when a filter that is not in the
2326 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2327 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2330 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2331 *rate2 += get_switchable_rate(cm, x);
2333 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2335 else if (x->encode_breakout) {
2336 unsigned int var, sse;
2337 int threshold = (xd->plane[0].dequant[1]
2338 * xd->plane[0].dequant[1] >> 4);
2340 if (threshold < x->encode_breakout)
2341 threshold = x->encode_breakout;
2343 var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
2344 x->plane[0].src.stride,
2345 xd->plane[0].dst.buf,
2346 xd->plane[0].dst.stride,
2349 if ((int)sse < threshold) {
2350 unsigned int q2dc = xd->plane[0].dequant[0];
2351 /* If there is no codeable 2nd order dc
2352 or a very small uniform pixel change change */
2353 if ((sse - var < q2dc * q2dc >> 4) ||
2354 (sse / 2 > var && sse - var < 64)) {
2355 // Check u and v to make sure skip is ok
2357 unsigned int sse2u, sse2v;
2358 var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
2359 x->plane[1].src.stride,
2360 xd->plane[1].dst.buf,
2361 xd->plane[1].dst.stride, &sse2u);
2362 var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
2363 x->plane[1].src.stride,
2364 xd->plane[2].dst.buf,
2365 xd->plane[1].dst.stride, &sse2v);
2366 sse2 = sse2u + sse2v;
2368 if (sse2 * 2 < threshold) {
2370 *distortion = sse + sse2;
2373 /* for best_yrd calculation */
2375 *distortion_uv = sse2;
2378 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2385 int skippable_y, skippable_uv;
2387 // Y cost and distortion
2388 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
2392 *distortion += *distortion_y;
2394 super_block_uvrd(cm, x, rate_uv, distortion_uv,
2395 &skippable_uv, bsize);
2398 *distortion += *distortion_uv;
2399 *skippable = skippable_y && skippable_uv;
2402 if (!(*mode_excluded)) {
2404 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2406 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2410 return this_rd; // if 0, this will be re-calculated by caller
2413 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2414 int *returnrate, int *returndist,
2415 BLOCK_SIZE_TYPE bsize,
2416 PICK_MODE_CONTEXT *ctx) {
2417 VP9_COMMON *cm = &cpi->common;
2418 MACROBLOCKD *xd = &x->e_mbd;
2419 int rate_y = 0, rate_uv;
2420 int rate_y_tokenonly = 0, rate_uv_tokenonly;
2421 int dist_y = 0, dist_uv;
2422 int y_skip = 0, uv_skip;
2423 int64_t txfm_cache[NB_TXFM_MODES], err;
2424 MB_PREDICTION_MODE mode;
2426 int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
2427 int64_t err4x4 = INT64_MAX;
2430 vpx_memset(&txfm_cache,0,sizeof(txfm_cache));
2432 xd->mode_info_context->mbmi.mode = DC_PRED;
2433 xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2434 err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2435 &dist_y, &y_skip, bsize, txfm_cache);
2436 mode = xd->mode_info_context->mbmi.mode;
2437 txfm_size = xd->mode_info_context->mbmi.txfm_size;
2438 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
2440 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2442 if (bsize < BLOCK_SIZE_SB8X8)
2443 err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
2444 &rate4x4_y_tokenonly,
2447 if (y_skip && uv_skip) {
2448 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2449 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
2450 *returndist = dist_y + (dist_uv >> 2);
2451 memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2452 xd->mode_info_context->mbmi.mode = mode;
2453 xd->mode_info_context->mbmi.txfm_size = txfm_size;
2454 } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
2455 *returnrate = rate4x4_y + rate_uv +
2456 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2457 *returndist = dist4x4_y + (dist_uv >> 2);
2458 vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2459 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2461 *returnrate = rate_y + rate_uv +
2462 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2463 *returndist = dist_y + (dist_uv >> 2);
2464 for (i = 0; i < NB_TXFM_MODES; i++) {
2465 ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode];
2467 xd->mode_info_context->mbmi.txfm_size = txfm_size;
2468 xd->mode_info_context->mbmi.mode = mode;
2471 ctx->mic = *xd->mode_info_context;
2474 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2475 int mi_row, int mi_col,
2477 int *returndistortion,
2478 BLOCK_SIZE_TYPE bsize,
2479 PICK_MODE_CONTEXT *ctx) {
2480 VP9_COMMON *cm = &cpi->common;
2481 MACROBLOCKD *xd = &x->e_mbd;
2482 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2483 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2484 MB_PREDICTION_MODE this_mode;
2485 MB_PREDICTION_MODE best_mode = DC_PRED;
2486 MV_REFERENCE_FRAME ref_frame, second_ref = INTRA_FRAME;
2487 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
2489 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2490 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2491 int_mv single_newmv[MAX_REF_FRAMES];
2492 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2494 int idx_list[4] = {0,
2498 int64_t best_rd = INT64_MAX;
2499 int64_t best_txfm_rd[NB_TXFM_MODES];
2500 int64_t best_txfm_diff[NB_TXFM_MODES];
2501 int64_t best_pred_diff[NB_PREDICTION_TYPES];
2502 int64_t best_pred_rd[NB_PREDICTION_TYPES];
2503 MB_MODE_INFO best_mbmode;
2505 int mode_index, best_mode_index = 0;
2506 unsigned int ref_costs[MAX_REF_FRAMES];
2507 int64_t best_overall_rd = INT64_MAX;
2508 INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
2509 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
2510 int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
2511 int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB];
2512 MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
2513 struct scale_factors scale_factor[4];
2514 unsigned int ref_frame_mask = 0;
2515 unsigned int mode_mask = 0;
2516 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
2517 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
2518 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
2519 cpi->common.y_dc_delta_q);
2520 int_mv seg_mvs[4][MAX_REF_FRAMES];
2521 union b_mode_info best_bmodes[4];
2522 PARTITION_INFO best_partition;
2523 int bwsl = b_width_log2(bsize);
2524 int bws = (1 << bwsl) / 4; // mode_info step for subsize
2525 int bhsl = b_width_log2(bsize);
2526 int bhs = (1 << bhsl) / 4; // mode_info step for subsize
2528 for (i = 0; i < 4; i++) {
2531 for (j = 0; j < MAX_REF_FRAMES; j++)
2532 seg_mvs[i][j].as_int = INVALID_MV;
2534 // Everywhere the flag is set the error is much higher than its neighbors.
2535 ctx->frames_with_high_error = 0;
2536 ctx->modes_with_high_error = 0;
2538 xd->mode_info_context->mbmi.segment_id = segment_id;
2539 estimate_ref_frame_costs(cpi, segment_id, ref_costs);
2540 vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
2541 vpx_memset(&single_newmv, 0, sizeof(single_newmv));
2543 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
2544 best_pred_rd[i] = INT64_MAX;
2545 for (i = 0; i < NB_TXFM_MODES; i++)
2546 best_txfm_rd[i] = INT64_MAX;
2548 // Create a mask set to 1 for each frame used by a smaller resolution.
2549 if (cpi->speed > 0) {
2550 switch (block_size) {
2552 for (i = 0; i < 4; i++) {
2553 for (j = 0; j < 4; j++) {
2554 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
2555 mode_mask |= x->mb_context[i][j].modes_with_high_error;
2558 for (i = 0; i < 4; i++) {
2559 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
2560 mode_mask |= x->sb32_context[i].modes_with_high_error;
2564 for (i = 0; i < 4; i++) {
2566 x->mb_context[xd->sb_index][i].frames_with_high_error;
2567 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
2571 // Until we handle all block sizes set it to present;
2576 ref_frame_mask = ~ref_frame_mask;
2577 mode_mask = ~mode_mask;
2580 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
2581 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2582 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
2583 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
2584 yv12_mb, scale_factor);
2586 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2587 frame_mv[ZEROMV][ref_frame].as_int = 0;
2590 || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
2591 mbmi->mode = DC_PRED;
2592 mbmi->ref_frame = INTRA_FRAME;
2593 for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
2594 (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
2595 (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
2597 mbmi->txfm_size = i;
2598 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
2599 &dist_uv[i], &skip_uv[i],
2600 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2602 mode_uv[i] = mbmi->uv_mode;
2606 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
2607 int mode_excluded = 0;
2608 int64_t this_rd = INT64_MAX;
2609 int disable_skip = 0;
2611 int compmode_cost = 0;
2612 int rate2 = 0, rate_y = 0, rate_uv = 0;
2613 int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2615 int64_t txfm_cache[NB_TXFM_MODES];
2618 for (i = 0; i < NB_TXFM_MODES; ++i)
2619 txfm_cache[i] = INT64_MAX;
2621 // Test best rd so far against threshold for trying this mode.
2622 if (bsize >= BLOCK_SIZE_SB8X8 &&
2623 (best_rd < cpi->rd_threshes[mode_index] ||
2624 cpi->rd_threshes[mode_index] == INT_MAX))
2628 this_mode = vp9_mode_order[mode_index].mode;
2629 ref_frame = vp9_mode_order[mode_index].ref_frame;
2631 if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
2632 if (!(ref_frame_mask & (1 << ref_frame))) {
2635 if (!(mode_mask & (1 << this_mode))) {
2638 if (vp9_mode_order[mode_index].second_ref_frame != NONE
2640 & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
2645 mbmi->ref_frame = ref_frame;
2646 mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
2648 if (!(ref_frame == INTRA_FRAME
2649 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
2652 if (!(mbmi->second_ref_frame == NONE
2653 || (cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))) {
2657 // TODO(jingning, jkoleszar): scaling reference frame not supported for
2659 if (mbmi->ref_frame > 0 &&
2660 (scale_factor[mbmi->ref_frame].x_num !=
2661 scale_factor[mbmi->ref_frame].x_den ||
2662 scale_factor[mbmi->ref_frame].y_num !=
2663 scale_factor[mbmi->ref_frame].y_den) &&
2664 this_mode == SPLITMV)
2667 if (mbmi->second_ref_frame > 0 &&
2668 (scale_factor[mbmi->second_ref_frame].x_num !=
2669 scale_factor[mbmi->second_ref_frame].x_den ||
2670 scale_factor[mbmi->second_ref_frame].y_num !=
2671 scale_factor[mbmi->second_ref_frame].y_den) &&
2672 this_mode == SPLITMV)
2675 set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
2677 comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
2678 mbmi->mode = this_mode;
2679 mbmi->uv_mode = DC_PRED;
2681 // Evaluate all sub-pel filters irrespective of whether we can use
2682 // them for this frame.
2683 mbmi->interp_filter = cm->mcomp_filter_type;
2684 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2686 if (bsize >= BLOCK_SIZE_SB8X8 &&
2687 (this_mode == I4X4_PRED || this_mode == SPLITMV))
2689 if (bsize < BLOCK_SIZE_SB8X8 &&
2690 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
2694 if (ref_frame == ALTREF_FRAME) {
2695 second_ref = LAST_FRAME;
2697 second_ref = ref_frame + 1;
2699 if (!(cpi->ref_frame_flags & flag_list[second_ref]))
2701 mbmi->second_ref_frame = second_ref;
2702 set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
2707 mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
2709 // mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
2710 if (ref_frame != INTRA_FRAME) {
2711 if (mbmi->second_ref_frame != INTRA_FRAME)
2714 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
2718 // Select predictors
2719 for (i = 0; i < MAX_MB_PLANE; i++) {
2720 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2722 xd->plane[i].pre[1] = yv12_mb[second_ref][i];
2725 // If the segment reference frame feature is enabled....
2726 // then do nothing if the current ref frame is not allowed..
2727 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
2728 !vp9_check_segref(xd, segment_id, ref_frame)) {
2730 // If the segment skip feature is enabled....
2731 // then do nothing if the current mode is not allowed..
2732 } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
2733 (this_mode != ZEROMV)) {
2735 // Disable this drop out case if the ref frame
2736 // segment level feature is enabled for this segment. This is to
2737 // prevent the possibility that we end up unable to pick any mode.
2738 } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
2739 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2740 // unless ARNR filtering is enabled in which case we want
2741 // an unfiltered alternative
2742 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2743 if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
2748 // TODO(JBB): This is to make up for the fact that we don't have sad
2749 // functions that work when the block size reads outside the umv. We
2750 // should fix this either by making the motion search just work on
2751 // a representative block in the boundary ( first ) and then implement a
2752 // function that does sads when inside the border..
2753 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
2754 this_mode == NEWMV) {
2758 if (this_mode == I4X4_PRED) {
2761 mbmi->txfm_size = TX_4X4;
2762 rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
2763 &distortion_y, INT64_MAX);
2765 rate2 += intra_cost_penalty;
2766 distortion2 += distortion_y;
2768 rate2 += rate_uv_intra[TX_4X4];
2769 rate_uv = rate_uv_intra[TX_4X4];
2770 distortion2 += dist_uv[TX_4X4];
2771 distortion_uv = dist_uv[TX_4X4];
2772 mbmi->uv_mode = mode_uv[TX_4X4];
2773 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2774 for (i = 0; i < NB_TXFM_MODES; ++i)
2775 txfm_cache[i] = txfm_cache[ONLY_4X4];
2776 } else if (ref_frame == INTRA_FRAME) {
2778 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2781 uv_tx = mbmi->txfm_size;
2782 if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
2784 if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
2786 else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
2789 rate_uv = rate_uv_intra[uv_tx];
2790 distortion_uv = dist_uv[uv_tx];
2791 skippable = skippable && skip_uv[uv_tx];
2792 mbmi->uv_mode = mode_uv[uv_tx];
2794 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv;
2795 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
2796 rate2 += intra_cost_penalty;
2797 distortion2 = distortion_y + distortion_uv;
2798 } else if (this_mode == SPLITMV) {
2799 const int is_comp_pred = mbmi->second_ref_frame > 0;
2800 int rate, distortion;
2801 int64_t this_rd_thresh;
2802 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
2803 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
2804 int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
2805 int switchable_filter_index;
2806 int_mv *second_ref = is_comp_pred ?
2807 &mbmi->ref_mvs[mbmi->second_ref_frame][0] : NULL;
2808 union b_mode_info tmp_best_bmodes[16];
2809 MB_MODE_INFO tmp_best_mbmode;
2810 PARTITION_INFO tmp_best_partition;
2811 int pred_exists = 0;
2814 this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ?
2815 cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
2816 this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ?
2817 cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
2818 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2820 for (switchable_filter_index = 0;
2821 switchable_filter_index < VP9_SWITCHABLE_FILTERS;
2822 ++switchable_filter_index) {
2824 mbmi->interp_filter =
2825 vp9_switchable_interp[switchable_filter_index];
2826 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2828 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2829 &mbmi->ref_mvs[mbmi->ref_frame][0],
2830 second_ref, INT64_MAX,
2831 &rate, &rate_y, &distortion,
2833 (int)this_rd_thresh, seg_mvs,
2835 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2836 const int rs = get_switchable_rate(cm, x);
2837 tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
2839 newbest = (tmp_rd < tmp_best_rd);
2841 tmp_best_filter = mbmi->interp_filter;
2842 tmp_best_rd = tmp_rd;
2844 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
2845 (mbmi->interp_filter == cm->mcomp_filter_type &&
2846 cm->mcomp_filter_type != SWITCHABLE)) {
2847 tmp_best_rdu = tmp_rd;
2848 tmp_best_rate = rate;
2849 tmp_best_ratey = rate_y;
2850 tmp_best_distortion = distortion;
2851 tmp_best_skippable = skippable;
2852 tmp_best_mbmode = *mbmi;
2853 tmp_best_partition = *x->partition_info;
2854 for (i = 0; i < 4; i++)
2855 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
2858 } // switchable_filter_index loop
2860 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
2861 tmp_best_filter : cm->mcomp_filter_type);
2862 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2864 // Handles the special case when a filter that is not in the
2865 // switchable list (bilinear, 6-tap) is indicated at the frame level
2866 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2867 &mbmi->ref_mvs[mbmi->ref_frame][0],
2868 second_ref, INT64_MAX,
2869 &rate, &rate_y, &distortion,
2871 (int)this_rd_thresh, seg_mvs,
2874 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2875 int rs = get_switchable_rate(cm, x);
2876 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
2878 tmp_rd = tmp_best_rdu;
2879 rate = tmp_best_rate;
2880 rate_y = tmp_best_ratey;
2881 distortion = tmp_best_distortion;
2882 skippable = tmp_best_skippable;
2883 *mbmi = tmp_best_mbmode;
2884 *x->partition_info = tmp_best_partition;
2885 for (i = 0; i < 4; i++)
2886 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
2890 distortion2 += distortion;
2892 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2893 rate2 += get_switchable_rate(cm, x);
2895 // If even the 'Y' rd value of split is higher than best so far
2896 // then dont bother looking at UV
2897 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
2899 vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
2900 super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
2901 &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4);
2903 distortion2 += distortion_uv;
2904 skippable = skippable && uv_skippable;
2906 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2907 for (i = 0; i < NB_TXFM_MODES; ++i)
2908 txfm_cache[i] = txfm_cache[ONLY_4X4];
2910 if (!mode_excluded) {
2912 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
2914 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
2918 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
2920 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2921 int fb = get_ref_frame_idx(cpi, mbmi->ref_frame);
2922 if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
2923 scaled_ref_frame[0] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
2926 fb = get_ref_frame_idx(cpi, mbmi->second_ref_frame);
2927 if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
2928 scaled_ref_frame[1] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
2931 this_rd = handle_inter_mode(cpi, x, bsize,
2933 &rate2, &distortion2, &skippable,
2935 &rate_y, &distortion_y,
2936 &rate_uv, &distortion_uv,
2937 &mode_excluded, &disable_skip,
2938 &tmp_best_filter, frame_mv[this_mode],
2939 scaled_ref_frame, mi_row, mi_col,
2941 if (this_rd == INT64_MAX)
2945 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2946 rate2 += compmode_cost;
2949 // Estimate the reference frame signaling cost and add it
2950 // to the rolling cost variable.
2951 rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame];
2953 if (!disable_skip) {
2954 // Test for the condition where skip block will be activated
2955 // because there are no non zero coefficients and make any
2956 // necessary adjustment for rate. Ignore if skip is coded at
2957 // segment level as the cost wont have been added in.
2958 int mb_skip_allowed;
2960 // Is Mb level skip allowed (i.e. not coded at segment level).
2961 mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
2963 if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
2964 // Back out the coefficient coding costs
2965 rate2 -= (rate_y + rate_uv);
2966 // for best_yrd calculation
2969 if (mb_skip_allowed) {
2972 // Cost the skip mb case
2973 vp9_prob skip_prob =
2974 vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
2977 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
2978 rate2 += prob_skip_cost;
2979 other_cost += prob_skip_cost;
2982 } else if (mb_skip_allowed) {
2983 // Add in the cost of the no skip flag.
2984 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
2986 rate2 += prob_skip_cost;
2987 other_cost += prob_skip_cost;
2990 // Calculate the final RD estimate for this mode.
2991 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2995 // Keep record of best intra distortion
2996 if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2997 (this_rd < best_intra_rd)) {
2998 best_intra_rd = this_rd;
2999 *returnintra = distortion2;
3003 if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
3004 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3005 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3007 if (this_rd < best_overall_rd) {
3008 best_overall_rd = this_rd;
3009 best_filter = tmp_best_filter;
3010 best_mode = this_mode;
3013 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3014 // Store the respective mode distortions for later use.
3015 if (mode_distortions[this_mode] == -1
3016 || distortion2 < mode_distortions[this_mode]) {
3017 mode_distortions[this_mode] = distortion2;
3019 if (frame_distortions[mbmi->ref_frame] == -1
3020 || distortion2 < frame_distortions[mbmi->ref_frame]) {
3021 frame_distortions[mbmi->ref_frame] = distortion2;
3025 // Did this mode help.. i.e. is it the new best mode
3026 if (this_rd < best_rd || x->skip) {
3027 if (!mode_excluded) {
3028 // Note index of best mode so far
3029 best_mode_index = mode_index;
3031 if (ref_frame == INTRA_FRAME) {
3032 /* required for left and above block mv */
3033 mbmi->mv[0].as_int = 0;
3036 other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame];
3037 *returnrate = rate2;
3038 *returndistortion = distortion2;
3040 best_mbmode = *mbmi;
3041 best_partition = *x->partition_info;
3043 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3044 for (i = 0; i < 4; i++)
3045 best_bmodes[i] = xd->mode_info_context->bmi[i];
3048 // Testing this mode gave rise to an improvement in best error score.
3049 // Lower threshold a bit for next time
3050 cpi->rd_thresh_mult[mode_index] =
3051 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3052 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3053 cpi->rd_threshes[mode_index] =
3054 (cpi->rd_baseline_thresh[mode_index] >> 7)
3055 * cpi->rd_thresh_mult[mode_index];
3058 // If the mode did not help improve the best error case then
3059 // raise the threshold for testing that mode next time around.
3061 cpi->rd_thresh_mult[mode_index] += 4;
3063 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3064 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3066 cpi->rd_threshes[mode_index] =
3067 (cpi->rd_baseline_thresh[mode_index] >> 7)
3068 * cpi->rd_thresh_mult[mode_index];
3072 /* keep record of best compound/single-only prediction */
3073 if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
3074 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3076 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3077 single_rate = rate2 - compmode_cost;
3078 hybrid_rate = rate2;
3080 single_rate = rate2;
3081 hybrid_rate = rate2 + compmode_cost;
3084 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3085 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3087 if (mbmi->second_ref_frame <= INTRA_FRAME &&
3088 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3089 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3090 } else if (mbmi->second_ref_frame > INTRA_FRAME &&
3091 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3092 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3094 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3095 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3098 /* keep record of best txfm size */
3099 if (bsize < BLOCK_SIZE_SB32X32) {
3100 if (bsize < BLOCK_SIZE_MB16X16) {
3101 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3102 txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
3103 txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
3105 txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
3107 if (!mode_excluded && this_rd != INT64_MAX) {
3108 for (i = 0; i < NB_TXFM_MODES; i++) {
3109 int64_t adj_rd = INT64_MAX;
3110 if (this_mode != I4X4_PRED) {
3111 adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
3116 if (adj_rd < best_txfm_rd[i])
3117 best_txfm_rd[i] = adj_rd;
3121 if (x->skip && !mode_excluded)
3124 // Flag all modes that have a distortion thats > 2x the best we found at
3126 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3127 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3130 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3131 ctx->modes_with_high_error |= (1 << mode_index);
3135 // Flag all ref frames that have a distortion thats > 2x the best we found at
3137 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3138 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3139 ctx->frames_with_high_error |= (1 << ref_frame);
3143 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
3144 *returnrate = INT_MAX;
3145 *returndistortion = INT_MAX;
3149 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3150 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3151 (best_mbmode.ref_frame == INTRA_FRAME));
3153 // Accumulate filter usage stats
3154 // TODO(agrange): Use RD criteria to select interpolation filter mode.
3155 if (is_inter_mode(best_mode))
3156 ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
3158 // TODO(rbultje) integrate with RD thresholding
3160 // Reduce the activation RD thresholds for the best choice mode
3161 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
3162 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
3163 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
3165 cpi->rd_thresh_mult[best_mode_index] =
3166 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
3167 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
3168 cpi->rd_threshes[best_mode_index] =
3169 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
3173 // This code forces Altref,0,0 and skip for the frame that overlays a
3174 // an alrtef unless Altref is filtered. However, this is unsafe if
3175 // segment level coding of ref frame is enabled for this segment.
3176 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
3177 cpi->is_src_frame_alt_ref &&
3178 (cpi->oxcf.arnr_max_frames == 0) &&
3179 (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
3180 && bsize >= BLOCK_SIZE_SB8X8) {
3181 mbmi->mode = ZEROMV;
3182 mbmi->ref_frame = ALTREF_FRAME;
3183 mbmi->second_ref_frame = NONE;
3184 mbmi->mv[0].as_int = 0;
3185 mbmi->uv_mode = DC_PRED;
3186 mbmi->mb_skip_coeff = 1;
3187 if (cm->txfm_mode == TX_MODE_SELECT) {
3188 if (bsize >= BLOCK_SIZE_SB32X32)
3189 mbmi->txfm_size = TX_32X32;
3190 else if (bsize >= BLOCK_SIZE_MB16X16)
3191 mbmi->txfm_size = TX_16X16;
3193 mbmi->txfm_size = TX_8X8;
3196 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3197 vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
3202 *mbmi = best_mbmode;
3203 if (best_mbmode.ref_frame == INTRA_FRAME &&
3204 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3205 for (i = 0; i < 4; i++)
3206 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3209 if (best_mbmode.ref_frame != INTRA_FRAME &&
3210 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3211 for (i = 0; i < 4; i++)
3212 xd->mode_info_context->bmi[i].as_mv[0].as_int =
3213 best_bmodes[i].as_mv[0].as_int;
3215 if (mbmi->second_ref_frame > 0)
3216 for (i = 0; i < 4; i++)
3217 xd->mode_info_context->bmi[i].as_mv[1].as_int =
3218 best_bmodes[i].as_mv[1].as_int;
3220 *x->partition_info = best_partition;
3222 mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
3223 mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
3226 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3227 if (best_pred_rd[i] == INT64_MAX)
3228 best_pred_diff[i] = INT_MIN;
3230 best_pred_diff[i] = best_rd - best_pred_rd[i];
3234 for (i = 0; i < NB_TXFM_MODES; i++) {
3235 if (best_txfm_rd[i] == INT64_MAX)
3236 best_txfm_diff[i] = 0;
3238 best_txfm_diff[i] = best_rd - best_txfm_rd[i];
3241 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3245 set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
3247 store_coding_context(x, ctx, best_mode_index,
3249 &mbmi->ref_mvs[mbmi->ref_frame][0],
3250 &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
3251 mbmi->second_ref_frame][0],
3252 best_pred_diff, best_txfm_diff);