2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/encoder/vp9_modecosts.h"
21 #include "vp9/encoder/vp9_encodeintra.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_reconinter.h"
24 #include "vp9/common/vp9_reconintra.h"
25 #include "vp9/common/vp9_findnearmv.h"
26 #include "vp9/common/vp9_quant_common.h"
27 #include "vp9/encoder/vp9_encodemb.h"
28 #include "vp9/encoder/vp9_quantize.h"
29 #include "vp9/encoder/vp9_variance.h"
30 #include "vp9/encoder/vp9_mcomp.h"
31 #include "vp9/encoder/vp9_rdopt.h"
32 #include "vp9/encoder/vp9_ratectrl.h"
33 #include "vpx_mem/vpx_mem.h"
34 #include "vp9/common/vp9_systemdependent.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/common/vp9_seg_common.h"
37 #include "vp9/common/vp9_pred_common.h"
38 #include "vp9/common/vp9_entropy.h"
39 #include "./vp9_rtcd.h"
40 #include "vp9/common/vp9_mvref_common.h"
41 #include "vp9/common/vp9_common.h"
43 #define INVALID_MV 0x80008000
45 /* Factor to weigh the rate for switchable interp filters */
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
49 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
50 #define ALT_REF_MODE_MASK 0xFFC648D0
52 #define MIN_EARLY_TERM_INDEX 3
54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55 {NEARESTMV, LAST_FRAME, NONE},
56 {NEARESTMV, ALTREF_FRAME, NONE},
57 {NEARESTMV, GOLDEN_FRAME, NONE},
59 {DC_PRED, INTRA_FRAME, NONE},
61 {NEWMV, LAST_FRAME, NONE},
62 {NEWMV, ALTREF_FRAME, NONE},
63 {NEWMV, GOLDEN_FRAME, NONE},
65 {NEARMV, LAST_FRAME, NONE},
66 {NEARMV, ALTREF_FRAME, NONE},
67 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
68 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
70 {TM_PRED, INTRA_FRAME, NONE},
72 {NEARMV, LAST_FRAME, ALTREF_FRAME},
73 {NEWMV, LAST_FRAME, ALTREF_FRAME},
74 {NEARMV, GOLDEN_FRAME, NONE},
75 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
76 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
78 {ZEROMV, LAST_FRAME, NONE},
79 {ZEROMV, GOLDEN_FRAME, NONE},
80 {ZEROMV, ALTREF_FRAME, NONE},
81 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
82 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
84 {H_PRED, INTRA_FRAME, NONE},
85 {V_PRED, INTRA_FRAME, NONE},
86 {D135_PRED, INTRA_FRAME, NONE},
87 {D207_PRED, INTRA_FRAME, NONE},
88 {D153_PRED, INTRA_FRAME, NONE},
89 {D63_PRED, INTRA_FRAME, NONE},
90 {D117_PRED, INTRA_FRAME, NONE},
91 {D45_PRED, INTRA_FRAME, NONE},
94 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
98 {LAST_FRAME, ALTREF_FRAME},
99 {GOLDEN_FRAME, ALTREF_FRAME},
103 // The baseline rd thresholds for breaking out of the rd loop for
104 // certain modes are assumed to be based on 8x8 blocks.
105 // This table is used to correct for blocks size.
106 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
107 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
108 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
110 #define RD_THRESH_MAX_FACT 64
111 #define RD_THRESH_INC 1
112 #define RD_THRESH_POW 1.25
113 #define RD_MULT_EPB_RATIO 64
115 #define MV_COST_WEIGHT 108
116 #define MV_COST_WEIGHT_SUB 120
118 static void fill_token_costs(vp9_coeff_cost *c,
119 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
122 for (t = TX_4X4; t <= TX_32X32; t++)
123 for (i = 0; i < BLOCK_TYPES; i++)
124 for (j = 0; j < REF_TYPES; j++)
125 for (k = 0; k < COEF_BANDS; k++)
126 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
127 vp9_prob probs[ENTROPY_NODES];
128 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
129 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
131 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
133 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
134 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
138 static const int rd_iifactor[32] = {
139 4, 4, 3, 2, 1, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0,
141 0, 0, 0, 0, 0, 0, 0, 0,
142 0, 0, 0, 0, 0, 0, 0, 0,
145 // 3* dc_qlookup[Q]*dc_qlookup[Q];
147 /* values are now correlated to quantizer */
148 static int sad_per_bit16lut[QINDEX_RANGE];
149 static int sad_per_bit4lut[QINDEX_RANGE];
151 void vp9_init_me_luts() {
154 // Initialize the sad lut tables using a formulaic calculation for now
155 // This is to make it easier to resolve the impact of experimental changes
156 // to the quantizer tables.
157 for (i = 0; i < QINDEX_RANGE; i++) {
158 sad_per_bit16lut[i] =
159 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
160 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
164 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
165 const int q = vp9_dc_quant(qindex, 0);
166 // TODO(debargha): Adjust the function below
167 int rdmult = 88 * q * q / 25;
168 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
169 if (cpi->twopass.next_iiratio > 31)
170 rdmult += (rdmult * rd_iifactor[31]) >> 4;
172 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
177 static int compute_rd_thresh_factor(int qindex) {
179 // TODO(debargha): Adjust the function below
180 q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
186 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
187 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
188 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
191 static void set_block_thresholds(VP9_COMP *cpi) {
192 int i, bsize, segment_id;
193 VP9_COMMON *cm = &cpi->common;
195 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
197 int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
198 segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
199 q = compute_rd_thresh_factor(segment_qindex);
201 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
202 // Threshold here seem unecessarily harsh but fine given actual
203 // range of values used for cpi->sf.thresh_mult[]
204 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
206 for (i = 0; i < MAX_MODES; ++i) {
207 if (cpi->sf.thresh_mult[i] < thresh_max) {
208 cpi->rd_threshes[segment_id][bsize][i] =
209 cpi->sf.thresh_mult[i] * q *
210 rd_thresh_block_size_factor[bsize] / 4;
212 cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
216 for (i = 0; i < MAX_REFS; ++i) {
217 if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
218 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
219 cpi->sf.thresh_mult_sub8x8[i] * q *
220 rd_thresh_block_size_factor[bsize] / 4;
222 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
229 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
230 VP9_COMMON *cm = &cpi->common;
233 vp9_clear_system_state(); // __asm emms;
235 // Further tests required to see if optimum is different
236 // for key frames, golden frames and arf frames.
237 // if (cpi->common.refresh_golden_frame ||
238 // cpi->common.refresh_alt_ref_frame)
239 qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
241 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
242 cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
244 cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
245 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
247 vp9_set_speed_features(cpi);
249 cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ?
252 set_block_thresholds(cpi);
254 fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
256 for (i = 0; i < PARTITION_CONTEXTS; i++)
257 vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
260 /*rough estimate for costing*/
261 vp9_init_mode_costs(cpi);
263 if (!frame_is_intra_only(cm)) {
264 vp9_build_nmv_cost_table(
265 cpi->mb.nmvjointcost,
266 cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
268 cm->allow_high_precision_mv, 1, 1);
270 for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
271 MB_PREDICTION_MODE m;
273 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
274 cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] =
275 cost_token(vp9_inter_mode_tree,
276 cm->fc.inter_mode_probs[i],
277 &vp9_inter_mode_encodings[INTER_OFFSET(m)]);
282 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
283 const double *tab1, const double *tab2,
284 double *v1, double *v2) {
285 double y = x * inv_step;
288 *v1 = tab1[ntab - 1];
289 *v2 = tab2[ntab - 1];
292 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
293 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
297 static void model_rd_norm(double x, double *R, double *D) {
298 static const int inv_tab_step = 8;
299 static const int tab_size = 120;
300 // NOTE: The tables below must be of the same size
303 // This table models the rate for a Laplacian source
304 // source with given variance when quantized with a uniform quantizer
305 // with given stepsize. The closed form expression is:
306 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
307 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
308 // and H(x) is the binary entropy function.
309 static const double rate_tab[] = {
310 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
311 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
312 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
313 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
314 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
315 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
316 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
317 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
318 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
319 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
320 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
321 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
322 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
323 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
324 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
326 // Normalized distortion
327 // This table models the normalized distortion for a Laplacian source
328 // source with given variance when quantized with a uniform quantizer
329 // with given stepsize. The closed form expression is:
330 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
331 // where x = qpstep / sqrt(variance)
332 // Note the actual distortion is Dn * variance.
333 static const double dist_tab[] = {
334 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
335 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
336 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
337 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
338 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
339 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
340 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
341 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
342 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
343 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
344 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
345 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
346 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
347 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
348 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
351 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
352 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
353 assert(sizeof(rate_tab) == sizeof(dist_tab));
356 linear_interpolate2(x, tab_size, inv_tab_step,
357 rate_tab, dist_tab, R, D);
360 static void model_rd_from_var_lapndz(int var, int n, int qstep,
361 int *rate, int64_t *dist) {
362 // This function models the rate and distortion for a Laplacian
363 // source with given variance when quantized with a uniform quantizer
364 // with given stepsize. The closed form expressions are in:
365 // Hang and Chen, "Source Model for transform video coder and its
366 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
367 // Sys. for Video Tech., April 1997.
368 vp9_clear_system_state();
369 if (var == 0 || n == 0) {
374 double s2 = (double) var / n;
375 double x = qstep / sqrt(s2);
376 model_rd_norm(x, &R, &D);
377 *rate = (int)((n << 8) * R + 0.5);
378 *dist = (int)(var * D + 0.5);
380 vp9_clear_system_state();
383 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
384 MACROBLOCK *x, MACROBLOCKD *xd,
385 int *out_rate_sum, int64_t *out_dist_sum) {
386 // Note our transform coeffs are 8 times an orthogonal transform.
387 // Hence quantizer step is also 8 times. To get effective quantizer
388 // we need to divide by 8 before sending to modeling function.
389 int i, rate_sum = 0, dist_sum = 0;
391 for (i = 0; i < MAX_MB_PLANE; ++i) {
392 struct macroblock_plane *const p = &x->plane[i];
393 struct macroblockd_plane *const pd = &xd->plane[i];
394 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
398 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
399 pd->dst.buf, pd->dst.stride, &sse);
400 // sse works better than var, since there is no dc prediction used
401 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
402 pd->dequant[1] >> 3, &rate, &dist);
405 dist_sum += (int)dist;
408 *out_rate_sum = rate_sum;
409 *out_dist_sum = dist_sum << 4;
412 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
414 MACROBLOCK *x, MACROBLOCKD *xd,
415 int *out_rate_sum, int64_t *out_dist_sum,
419 struct macroblock_plane *const p = &x->plane[0];
420 struct macroblockd_plane *const pd = &xd->plane[0];
421 const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
422 const int height = 4 << num_4x4_blocks_high_lookup[bsize];
424 int64_t dist_sum = 0;
425 const int t = 4 << tx_size;
427 if (tx_size == TX_4X4) {
429 } else if (tx_size == TX_8X8) {
431 } else if (tx_size == TX_16X16) {
433 } else if (tx_size == TX_32X32) {
440 for (j = 0; j < height; j += t) {
441 for (k = 0; k < width; k += t) {
445 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
446 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
448 // sse works better than var, since there is no dc prediction used
449 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
452 *out_skip &= (rate < 1024);
456 *out_rate_sum = rate_sum;
457 *out_dist_sum = dist_sum << 4;
460 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
461 intptr_t block_size, int64_t *ssz) {
463 int64_t error = 0, sqcoeff = 0;
465 for (i = 0; i < block_size; i++) {
466 int this_diff = coeff[i] - dqcoeff[i];
467 error += (unsigned)this_diff * this_diff;
468 sqcoeff += (unsigned) coeff[i] * coeff[i];
475 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
476 * decide whether to include cost of a trailing EOB node or not (i.e. we
477 * can skip this if the last coefficient in this transform block, e.g. the
478 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
480 static const int16_t band_counts[TX_SIZES][8] = {
481 { 1, 2, 3, 4, 3, 16 - 13, 0 },
482 { 1, 2, 3, 4, 11, 64 - 21, 0 },
483 { 1, 2, 3, 4, 11, 256 - 21, 0 },
484 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
487 static INLINE int cost_coeffs(MACROBLOCK *x,
488 int plane, int block,
489 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
491 const int16_t *scan, const int16_t *nb) {
492 MACROBLOCKD *const xd = &x->e_mbd;
493 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
494 struct macroblockd_plane *pd = &xd->plane[plane];
495 const PLANE_TYPE type = pd->plane_type;
496 const int16_t *band_count = &band_counts[tx_size][1];
497 const int eob = pd->eobs[block];
498 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
499 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
500 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
501 x->token_costs[tx_size][type][ref];
502 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
503 uint8_t *p_tok = x->token_cache;
504 int pt = combine_entropy_contexts(above_ec, left_ec);
507 // Check for consistency of tx_size with mode info
508 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
509 : get_uv_tx_size(mbmi) == tx_size);
513 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
516 int band_left = *band_count++;
519 int v = qcoeff_ptr[0];
520 int prev_t = vp9_dct_value_tokens_ptr[v].token;
521 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
522 p_tok[0] = vp9_pt_energy_class[prev_t];
526 for (c = 1; c < eob; c++) {
527 const int rc = scan[c];
531 t = vp9_dct_value_tokens_ptr[v].token;
532 pt = get_coef_context(nb, p_tok, c);
533 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
534 p_tok[rc] = vp9_pt_energy_class[t];
537 band_left = *band_count++;
544 pt = get_coef_context(nb, p_tok, c);
545 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
549 // is eob first coefficient;
555 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
556 const int ss_txfrm_size = tx_size << 1;
557 struct rdcost_block_args* args = arg;
558 MACROBLOCK* const x = args->x;
559 MACROBLOCKD* const xd = &x->e_mbd;
560 struct macroblock_plane *const p = &x->plane[plane];
561 struct macroblockd_plane *const pd = &xd->plane[plane];
563 int shift = args->tx_size == TX_32X32 ? 0 : 2;
564 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
565 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
566 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
568 args->sse = this_sse >> shift;
570 if (x->skip_encode &&
571 xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) {
572 // TODO(jingning): tune the model to better capture the distortion.
573 int64_t p = (pd->dequant[1] * pd->dequant[1] *
574 (1 << ss_txfrm_size)) >> (shift + 2);
575 args->dist += (p >> 4);
580 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
581 TX_SIZE tx_size, void *arg) {
582 struct rdcost_block_args* args = arg;
585 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
587 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
588 args->t_left + y_idx, args->tx_size,
589 args->scan, args->nb);
592 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
593 TX_SIZE tx_size, void *arg) {
594 struct rdcost_block_args *args = arg;
595 MACROBLOCK *const x = args->x;
596 MACROBLOCKD *const xd = &x->e_mbd;
597 struct encode_b_args encode_args = {x, NULL};
598 int64_t rd1, rd2, rd;
603 if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
604 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
606 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
608 dist_block(plane, block, tx_size, args);
609 rate_block(plane, block, plane_bsize, tx_size, args);
610 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
611 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
613 // TODO(jingning): temporarily enabled only for luma component
615 if (!xd->lossless && plane == 0)
616 x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
618 args->this_rate += args->rate;
619 args->this_dist += args->dist;
620 args->this_sse += args->sse;
623 if (args->this_rd > args->best_rd) {
629 void vp9_get_entropy_contexts(TX_SIZE tx_size,
630 ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
631 const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
632 int num_4x4_w, int num_4x4_h) {
636 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
637 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
640 for (i = 0; i < num_4x4_w; i += 2)
641 t_above[i] = !!*(const uint16_t *)&above[i];
642 for (i = 0; i < num_4x4_h; i += 2)
643 t_left[i] = !!*(const uint16_t *)&left[i];
646 for (i = 0; i < num_4x4_w; i += 4)
647 t_above[i] = !!*(const uint32_t *)&above[i];
648 for (i = 0; i < num_4x4_h; i += 4)
649 t_left[i] = !!*(const uint32_t *)&left[i];
652 for (i = 0; i < num_4x4_w; i += 8)
653 t_above[i] = !!*(const uint64_t *)&above[i];
654 for (i = 0; i < num_4x4_h; i += 8)
655 t_left[i] = !!*(const uint64_t *)&left[i];
658 assert(!"Invalid transform size.");
662 static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
663 const int num_4x4_w, const int num_4x4_h,
664 const int64_t ref_rdcost,
665 struct rdcost_block_args *arg) {
666 vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
668 arg->tx_size = tx_size;
671 arg->best_rd = ref_rdcost;
674 static void txfm_rd_in_plane(MACROBLOCK *x,
675 struct rdcost_block_args *rd_stack,
676 int *rate, int64_t *distortion,
677 int *skippable, int64_t *sse,
678 int64_t ref_best_rd, int plane,
679 BLOCK_SIZE bsize, TX_SIZE tx_size) {
680 MACROBLOCKD *const xd = &x->e_mbd;
681 struct macroblockd_plane *const pd = &xd->plane[plane];
682 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
683 const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
684 const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
686 init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
687 ref_best_rd, rd_stack);
689 xd->mi_8x8[0]->mbmi.tx_size = tx_size;
691 vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
692 pd->above_context, pd->left_context,
693 num_4x4_w, num_4x4_h);
695 get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb);
697 foreach_transformed_block_in_plane(xd, bsize, plane,
698 block_yrd_txfm, rd_stack);
699 if (rd_stack->skip) {
701 *distortion = INT64_MAX;
705 *distortion = rd_stack->this_dist;
706 *rate = rd_stack->this_rate;
707 *sse = rd_stack->this_sse;
708 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane);
712 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
713 int *rate, int64_t *distortion,
714 int *skip, int64_t *sse,
717 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
718 VP9_COMMON *const cm = &cpi->common;
719 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
720 MACROBLOCKD *const xd = &x->e_mbd;
721 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
723 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
725 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
726 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
728 cpi->tx_stepdown_count[0]++;
731 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
732 int (*r)[2], int *rate,
733 int64_t *d, int64_t *distortion,
735 int64_t tx_cache[TX_MODES],
737 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
738 VP9_COMMON *const cm = &cpi->common;
739 MACROBLOCKD *const xd = &x->e_mbd;
740 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
741 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
742 int64_t rd[TX_SIZES][2];
746 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
748 for (n = TX_4X4; n <= max_tx_size; n++) {
750 if (r[n][0] == INT_MAX)
752 for (m = 0; m <= n - (n == max_tx_size); m++) {
754 r[n][1] += vp9_cost_zero(tx_probs[m]);
756 r[n][1] += vp9_cost_one(tx_probs[m]);
760 assert(skip_prob > 0);
761 s0 = vp9_cost_bit(skip_prob, 0);
762 s1 = vp9_cost_bit(skip_prob, 1);
764 for (n = TX_4X4; n <= max_tx_size; n++) {
765 if (d[n] == INT64_MAX) {
766 rd[n][0] = rd[n][1] = INT64_MAX;
770 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
772 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
773 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
777 if (max_tx_size == TX_32X32 &&
778 (cm->tx_mode == ALLOW_32X32 ||
779 (cm->tx_mode == TX_MODE_SELECT &&
780 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
781 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
782 mbmi->tx_size = TX_32X32;
783 } else if (max_tx_size >= TX_16X16 &&
784 (cm->tx_mode == ALLOW_16X16 ||
785 cm->tx_mode == ALLOW_32X32 ||
786 (cm->tx_mode == TX_MODE_SELECT &&
787 rd[TX_16X16][1] < rd[TX_8X8][1] &&
788 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
789 mbmi->tx_size = TX_16X16;
790 } else if (cm->tx_mode == ALLOW_8X8 ||
791 cm->tx_mode == ALLOW_16X16 ||
792 cm->tx_mode == ALLOW_32X32 ||
793 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
794 mbmi->tx_size = TX_8X8;
796 mbmi->tx_size = TX_4X4;
799 *distortion = d[mbmi->tx_size];
800 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
801 *skip = s[mbmi->tx_size];
803 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
804 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
805 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
806 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
807 if (max_tx_size == TX_32X32 &&
808 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
809 rd[TX_32X32][1] < rd[TX_4X4][1])
810 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
811 else if (max_tx_size >= TX_16X16 &&
812 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
813 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
815 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
816 rd[TX_4X4][1] : rd[TX_8X8][1];
818 if (max_tx_size == TX_32X32 &&
819 rd[TX_32X32][1] < rd[TX_16X16][1] &&
820 rd[TX_32X32][1] < rd[TX_8X8][1] &&
821 rd[TX_32X32][1] < rd[TX_4X4][1]) {
822 cpi->tx_stepdown_count[0]++;
823 } else if (max_tx_size >= TX_16X16 &&
824 rd[TX_16X16][1] < rd[TX_8X8][1] &&
825 rd[TX_16X16][1] < rd[TX_4X4][1]) {
826 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
827 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
828 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
830 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
834 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
835 int (*r)[2], int *rate,
836 int64_t *d, int64_t *distortion,
837 int *s, int *skip, int64_t *sse,
840 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
841 VP9_COMMON *const cm = &cpi->common;
842 MACROBLOCKD *const xd = &x->e_mbd;
843 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
844 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
845 int64_t rd[TX_SIZES][2];
848 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
849 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
851 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
853 // for (n = TX_4X4; n <= max_txfm_size; n++)
854 // r[n][0] = (r[n][0] * scale_r[n]);
856 for (n = TX_4X4; n <= max_tx_size; n++) {
858 for (m = 0; m <= n - (n == max_tx_size); m++) {
860 r[n][1] += vp9_cost_zero(tx_probs[m]);
862 r[n][1] += vp9_cost_one(tx_probs[m]);
866 assert(skip_prob > 0);
867 s0 = vp9_cost_bit(skip_prob, 0);
868 s1 = vp9_cost_bit(skip_prob, 1);
870 for (n = TX_4X4; n <= max_tx_size; n++) {
872 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
874 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
875 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
878 for (n = TX_4X4; n <= max_tx_size; n++) {
879 rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
880 rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
883 if (max_tx_size == TX_32X32 &&
884 (cm->tx_mode == ALLOW_32X32 ||
885 (cm->tx_mode == TX_MODE_SELECT &&
886 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
887 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
888 rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
889 mbmi->tx_size = TX_32X32;
890 } else if (max_tx_size >= TX_16X16 &&
891 (cm->tx_mode == ALLOW_16X16 ||
892 cm->tx_mode == ALLOW_32X32 ||
893 (cm->tx_mode == TX_MODE_SELECT &&
894 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
895 rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
896 mbmi->tx_size = TX_16X16;
897 } else if (cm->tx_mode == ALLOW_8X8 ||
898 cm->tx_mode == ALLOW_16X16 ||
899 cm->tx_mode == ALLOW_32X32 ||
900 (cm->tx_mode == TX_MODE_SELECT &&
901 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
902 mbmi->tx_size = TX_8X8;
904 mbmi->tx_size = TX_4X4;
907 // Actually encode using the chosen mode if a model was used, but do not
908 // update the r, d costs
909 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
910 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
912 if (max_tx_size == TX_32X32 &&
913 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
914 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
915 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
916 cpi->tx_stepdown_count[0]++;
917 } else if (max_tx_size >= TX_16X16 &&
918 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
919 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
920 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
921 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
922 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
924 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
928 static void super_block_yrd(VP9_COMP *cpi,
929 MACROBLOCK *x, int *rate, int64_t *distortion,
930 int *skip, int64_t *psse, BLOCK_SIZE bs,
931 int64_t txfm_cache[TX_MODES],
932 int64_t ref_best_rd) {
933 int r[TX_SIZES][2], s[TX_SIZES];
934 int64_t d[TX_SIZES], sse[TX_SIZES];
935 MACROBLOCKD *xd = &x->e_mbd;
936 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
937 struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
938 const int b_inter_mode = is_inter_block(mbmi);
940 assert(bs == mbmi->sb_type);
942 vp9_subtract_sby(x, bs);
944 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
945 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
947 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
948 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
951 *psse = sse[mbmi->tx_size];
955 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
957 if (bs >= BLOCK_32X32)
958 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
959 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
960 if (bs >= BLOCK_16X16)
961 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
962 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
964 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
965 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
967 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
968 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
970 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
971 skip, sse, ref_best_rd, bs);
973 if (bs >= BLOCK_32X32)
974 txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32],
975 &s[TX_32X32], &sse[TX_32X32],
976 ref_best_rd, 0, bs, TX_32X32);
977 if (bs >= BLOCK_16X16)
978 txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16],
979 &s[TX_16X16], &sse[TX_16X16],
980 ref_best_rd, 0, bs, TX_16X16);
981 txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
982 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
983 txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
984 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
985 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
986 skip, txfm_cache, bs);
989 *psse = sse[mbmi->tx_size];
992 static int conditional_skipintra(MB_PREDICTION_MODE mode,
993 MB_PREDICTION_MODE best_intra_mode) {
994 if (mode == D117_PRED &&
995 best_intra_mode != V_PRED &&
996 best_intra_mode != D135_PRED)
998 if (mode == D63_PRED &&
999 best_intra_mode != V_PRED &&
1000 best_intra_mode != D45_PRED)
1002 if (mode == D207_PRED &&
1003 best_intra_mode != H_PRED &&
1004 best_intra_mode != D45_PRED)
1006 if (mode == D153_PRED &&
1007 best_intra_mode != H_PRED &&
1008 best_intra_mode != D135_PRED)
1013 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1014 MB_PREDICTION_MODE *best_mode,
1016 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1017 int *bestrate, int *bestratey,
1018 int64_t *bestdistortion,
1019 BLOCK_SIZE bsize, int64_t rd_thresh) {
1020 MB_PREDICTION_MODE mode;
1021 MACROBLOCKD *xd = &x->e_mbd;
1022 int64_t best_rd = rd_thresh;
1025 struct macroblock_plane *p = &x->plane[0];
1026 struct macroblockd_plane *pd = &xd->plane[0];
1027 const int src_stride = p->src.stride;
1028 const int dst_stride = pd->dst.stride;
1029 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1030 p->src.buf, src_stride);
1031 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1032 pd->dst.buf, dst_stride);
1033 int16_t *src_diff, *coeff;
1035 ENTROPY_CONTEXT ta[2], tempa[2];
1036 ENTROPY_CONTEXT tl[2], templ[2];
1038 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1039 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1041 uint8_t best_dst[8 * 8];
1045 vpx_memcpy(ta, a, sizeof(ta));
1046 vpx_memcpy(tl, l, sizeof(tl));
1047 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1049 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1053 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1056 // Only do the oblique modes if the best so far is
1057 // one of the neighboring directional modes
1058 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1059 if (conditional_skipintra(mode, *best_mode))
1063 rate = bmode_costs[mode];
1066 vpx_memcpy(tempa, ta, sizeof(ta));
1067 vpx_memcpy(templ, tl, sizeof(tl));
1069 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1070 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1072 const int16_t *scan;
1074 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1075 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1076 const int block = ib + idy * 2 + idx;
1078 xd->mi_8x8[0]->bmi[block].as_mode = mode;
1079 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1080 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1081 vp9_predict_intra_block(xd, block, 1,
1083 x->skip_encode ? src : dst,
1084 x->skip_encode ? src_stride : dst_stride,
1086 vp9_subtract_block(4, 4, src_diff, 8,
1090 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1091 get_scan_nb_4x4(tx_type, &scan, &nb);
1093 if (tx_type != DCT_DCT)
1094 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1096 x->fwd_txm4x4(src_diff, coeff, 8);
1098 vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type));
1100 ratey += cost_coeffs(x, 0, block,
1101 tempa + idx, templ + idy, TX_4X4, scan, nb);
1102 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1104 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1107 if (tx_type != DCT_DCT)
1108 vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
1109 dst, pd->dst.stride, tx_type);
1111 xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
1117 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1119 if (this_rd < best_rd) {
1122 *bestdistortion = distortion;
1125 vpx_memcpy(a, tempa, sizeof(tempa));
1126 vpx_memcpy(l, templ, sizeof(templ));
1127 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1128 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1129 num_4x4_blocks_wide * 4);
1135 if (best_rd >= rd_thresh || x->skip_encode)
1138 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1139 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1140 num_4x4_blocks_wide * 4);
1145 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1146 MACROBLOCK * const mb,
1149 int64_t * const distortion,
1152 MACROBLOCKD *const xd = &mb->e_mbd;
1153 MODE_INFO *const mic = xd->mi_8x8[0];
1154 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1155 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1156 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1157 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1158 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1161 int64_t total_distortion = 0;
1163 int64_t total_rd = 0;
1164 ENTROPY_CONTEXT t_above[4], t_left[4];
1167 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1168 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1170 bmode_costs = mb->mbmode_cost;
1172 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1173 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1174 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1175 MB_PREDICTION_MODE best_mode = DC_PRED;
1176 int r = INT_MAX, ry = INT_MAX;
1177 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1179 if (cpi->common.frame_type == KEY_FRAME) {
1180 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i);
1181 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i);
1183 bmode_costs = mb->y_mode_costs[A][L];
1186 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1187 t_above + idx, t_left + idy, &r, &ry, &d,
1188 bsize, best_rd - total_rd);
1189 if (this_rd >= best_rd - total_rd)
1192 total_rd += this_rd;
1194 total_distortion += d;
1197 mic->bmi[i].as_mode = best_mode;
1198 for (j = 1; j < num_4x4_blocks_high; ++j)
1199 mic->bmi[i + j * 2].as_mode = best_mode;
1200 for (j = 1; j < num_4x4_blocks_wide; ++j)
1201 mic->bmi[i + j].as_mode = best_mode;
1203 if (total_rd >= best_rd)
1209 *rate_y = tot_rate_y;
1210 *distortion = total_distortion;
1211 mic->mbmi.mode = mic->bmi[3].as_mode;
1213 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1216 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1217 int *rate, int *rate_tokenonly,
1218 int64_t *distortion, int *skippable,
1220 int64_t tx_cache[TX_MODES],
1222 MB_PREDICTION_MODE mode;
1223 MB_PREDICTION_MODE mode_selected = DC_PRED;
1224 MACROBLOCKD *const xd = &x->e_mbd;
1225 MODE_INFO *const mic = xd->mi_8x8[0];
1226 int this_rate, this_rate_tokenonly, s;
1227 int64_t this_distortion, this_rd;
1228 TX_SIZE best_tx = TX_4X4;
1230 int *bmode_costs = x->mbmode_cost;
1232 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1233 for (i = 0; i < TX_MODES; i++)
1234 tx_cache[i] = INT64_MAX;
1236 /* Y Search for intra prediction mode */
1237 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1238 int64_t local_tx_cache[TX_MODES];
1239 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1240 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1242 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1245 if (cpi->common.frame_type == KEY_FRAME) {
1246 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0);
1247 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0);
1249 bmode_costs = x->y_mode_costs[A][L];
1251 mic->mbmi.mode = mode;
1253 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1254 bsize, local_tx_cache, best_rd);
1256 if (this_rate_tokenonly == INT_MAX)
1259 this_rate = this_rate_tokenonly + bmode_costs[mode];
1260 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1262 if (this_rd < best_rd) {
1263 mode_selected = mode;
1265 best_tx = mic->mbmi.tx_size;
1267 *rate_tokenonly = this_rate_tokenonly;
1268 *distortion = this_distortion;
1272 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1273 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1274 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1275 local_tx_cache[cpi->common.tx_mode];
1276 if (adj_rd < tx_cache[i]) {
1277 tx_cache[i] = adj_rd;
1283 mic->mbmi.mode = mode_selected;
1284 mic->mbmi.tx_size = best_tx;
1289 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
1290 int *rate, int64_t *distortion, int *skippable,
1291 int64_t *sse, BLOCK_SIZE bsize,
1292 int64_t ref_best_rd) {
1293 MACROBLOCKD *const xd = &x->e_mbd;
1294 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1295 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1297 int pnrate = 0, pnskip = 1;
1298 int64_t pndist = 0, pnsse = 0;
1300 if (ref_best_rd < 0)
1303 if (is_inter_block(mbmi))
1304 vp9_subtract_sbuv(x, bsize);
1311 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1312 txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
1313 ref_best_rd, plane, bsize, uv_txfm_size);
1314 if (pnrate == INT_MAX)
1317 *distortion += pndist;
1319 *skippable &= pnskip;
1325 *distortion = INT64_MAX;
1331 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1332 int *rate, int *rate_tokenonly,
1333 int64_t *distortion, int *skippable,
1335 MB_PREDICTION_MODE mode;
1336 MB_PREDICTION_MODE mode_selected = DC_PRED;
1337 int64_t best_rd = INT64_MAX, this_rd;
1338 int this_rate_tokenonly, this_rate, s;
1339 int64_t this_distortion, this_sse;
1341 // int mode_mask = (bsize <= BLOCK_8X8)
1342 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
1344 for (mode = DC_PRED; mode <= TM_PRED; mode ++) {
1345 // if (!(mode_mask & (1 << mode)))
1346 if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]]
1350 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
1352 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1353 &this_distortion, &s, &this_sse, bsize, best_rd);
1354 if (this_rate_tokenonly == INT_MAX)
1356 this_rate = this_rate_tokenonly +
1357 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1358 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1360 if (this_rd < best_rd) {
1361 mode_selected = mode;
1364 *rate_tokenonly = this_rate_tokenonly;
1365 *distortion = this_distortion;
1370 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
1375 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1376 int *rate, int *rate_tokenonly,
1377 int64_t *distortion, int *skippable,
1382 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1383 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1384 skippable, &this_sse, bsize, INT64_MAX);
1385 *rate = *rate_tokenonly +
1386 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1387 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1392 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
1393 int *rate_uv, int *rate_uv_tokenonly,
1394 int64_t *dist_uv, int *skip_uv,
1395 MB_PREDICTION_MODE *mode_uv) {
1396 MACROBLOCK *const x = &cpi->mb;
1398 // Use an estimated rd for uv_intra based on DC_PRED if the
1399 // appropriate speed flag is set.
1400 if (cpi->sf.use_uv_intra_rd_estimate) {
1401 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1402 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1403 // Else do a proper rd search for each possible transform size that may
1404 // be considered in the main rd loop.
1406 rd_pick_intra_sbuv_mode(cpi, x,
1407 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1408 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1410 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1413 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1415 MACROBLOCK *const x = &cpi->mb;
1416 MACROBLOCKD *const xd = &x->e_mbd;
1417 const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
1419 // Don't account for mode here if segment skip is enabled.
1420 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1421 assert(is_inter_mode(mode));
1422 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1428 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1429 x->e_mbd.mi_8x8[0]->mbmi.mode = mb;
1430 x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int;
1433 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1436 int mi_row, int mi_col,
1437 int_mv single_newmv[MAX_REF_FRAMES],
1440 static int labels2mode(MACROBLOCK *x, int i,
1441 MB_PREDICTION_MODE this_mode,
1442 int_mv *this_mv, int_mv *this_second_mv,
1443 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1444 int_mv seg_mvs[MAX_REF_FRAMES],
1445 int_mv *best_ref_mv,
1446 int_mv *second_best_ref_mv,
1447 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1448 MACROBLOCKD *const xd = &x->e_mbd;
1449 MODE_INFO *const mic = xd->mi_8x8[0];
1450 MB_MODE_INFO *mbmi = &mic->mbmi;
1451 int cost = 0, thismvcost = 0;
1453 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1454 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1455 const int has_second_rf = has_second_ref(mbmi);
1457 /* We have to be careful retrieving previously-encoded motion vectors.
1458 Ones from this macroblock have to be pulled from the BLOCKD array
1459 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1460 MB_PREDICTION_MODE m;
1462 // the only time we should do costing for new motion vector or mode
1463 // is when we are on a new label (jbb May 08, 2007)
1464 switch (m = this_mode) {
1466 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1467 thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
1468 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1469 if (has_second_rf) {
1470 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1471 thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
1472 &second_best_ref_mv->as_mv,
1473 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1477 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1479 this_second_mv->as_int =
1480 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1483 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1485 this_second_mv->as_int =
1486 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1489 this_mv->as_int = 0;
1491 this_second_mv->as_int = 0;
1497 cost = cost_mv_ref(cpi, this_mode,
1498 mbmi->mode_context[mbmi->ref_frame[0]]);
1500 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1502 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1504 mic->bmi[i].as_mode = m;
1506 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1507 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1508 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1509 &mic->bmi[i], sizeof(mic->bmi[i]));
1515 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1520 int64_t *distortion, int64_t *sse,
1521 ENTROPY_CONTEXT *ta,
1522 ENTROPY_CONTEXT *tl) {
1524 MACROBLOCKD *xd = &x->e_mbd;
1525 struct macroblockd_plane *const pd = &xd->plane[0];
1526 struct macroblock_plane *const p = &x->plane[0];
1527 MODE_INFO *const mi = xd->mi_8x8[0];
1528 const BLOCK_SIZE bsize = mi->mbmi.sb_type;
1529 const int width = plane_block_width(bsize, pd);
1530 const int height = plane_block_height(bsize, pd);
1533 uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i,
1534 p->src.buf, p->src.stride);
1535 uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i,
1536 pd->dst.buf, pd->dst.stride);
1537 int64_t thisdistortion = 0, thissse = 0;
1538 int thisrate = 0, ref;
1539 const int is_compound = has_second_ref(&mi->mbmi);
1540 for (ref = 0; ref < 1 + is_compound; ++ref) {
1541 const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
1542 pd->pre[ref].buf, pd->pre[ref].stride);
1543 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1544 dst, pd->dst.stride,
1545 &mi->bmi[i].as_mv[ref].as_mv,
1546 &xd->scale_factor[ref],
1547 width, height, ref, &xd->subpix, MV_PRECISION_Q3);
1550 vp9_subtract_block(height, width,
1551 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1553 dst, pd->dst.stride);
1556 for (idy = 0; idy < height / 4; ++idy) {
1557 for (idx = 0; idx < width / 4; ++idx) {
1558 int64_t ssz, rd, rd1, rd2;
1561 k += (idy * 2 + idx);
1562 coeff = BLOCK_OFFSET(p->coeff, k);
1563 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1565 vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT),
1566 get_iscan_4x4(DCT_DCT));
1567 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1570 thisrate += cost_coeffs(x, 0, k,
1572 tl + (k >> 1), TX_4X4,
1573 vp9_default_scan_4x4,
1574 vp9_default_scan_4x4_neighbors);
1575 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1576 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1583 *distortion = thisdistortion >> 2;
1584 *labelyrate = thisrate;
1585 *sse = thissse >> 2;
1587 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1598 ENTROPY_CONTEXT ta[2];
1599 ENTROPY_CONTEXT tl[2];
1603 int_mv *ref_mv, *second_ref_mv;
1611 MB_PREDICTION_MODE modes[4];
1612 SEG_RDSTAT rdstat[4][INTER_MODES];
1616 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1618 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1619 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1620 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1621 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1625 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1626 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1627 struct macroblock_plane *const p = &x->plane[0];
1628 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1630 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
1632 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1633 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
1635 if (has_second_ref(mbmi))
1636 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
1640 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1641 struct buf_2d orig_pre[2]) {
1642 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1643 x->plane[0].src = orig_src;
1644 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1645 if (has_second_ref(mbmi))
1646 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1649 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1650 const TileInfo *const tile,
1651 BEST_SEG_INFO *bsi_buf, int filter_idx,
1652 int_mv seg_mvs[4][MAX_REF_FRAMES],
1653 int mi_row, int mi_col) {
1654 int i, br = 0, idx, idy;
1655 int64_t bd = 0, block_sse = 0;
1656 MB_PREDICTION_MODE this_mode;
1657 MODE_INFO *mi = x->e_mbd.mi_8x8[0];
1658 MB_MODE_INFO *const mbmi = &mi->mbmi;
1659 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1660 const int label_count = 4;
1661 int64_t this_segment_rd = 0;
1662 int label_mv_thresh;
1663 int segmentyrate = 0;
1664 const BLOCK_SIZE bsize = mbmi->sb_type;
1665 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1666 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1667 vp9_variance_fn_ptr_t *v_fn_ptr;
1668 ENTROPY_CONTEXT t_above[2], t_left[2];
1669 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1671 int subpelmv = 1, have_ref = 0;
1672 const int has_second_rf = has_second_ref(mbmi);
1674 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1675 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1677 v_fn_ptr = &cpi->fn_ptr[bsize];
1679 // 64 makes this threshold really big effectively
1680 // making it so that we very rarely check mvs on
1681 // segments. setting this to 1 would make mv thresh
1682 // roughly equal to what it is for macroblocks
1683 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1685 // Segmentation method overheads
1686 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1687 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1688 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1689 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1690 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1691 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1692 MB_PREDICTION_MODE mode_selected = ZEROMV;
1693 int64_t best_rd = INT64_MAX;
1696 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1697 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1698 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1699 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1700 i, 0, mi_row, mi_col);
1701 if (has_second_rf) {
1702 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1703 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1704 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1705 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1706 i, 1, mi_row, mi_col);
1708 // search for the best motion vector on this segment
1709 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1710 const struct buf_2d orig_src = x->plane[0].src;
1711 struct buf_2d orig_pre[2];
1713 mode_idx = INTER_OFFSET(this_mode);
1714 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1716 // if we're near/nearest and mv == 0,0, compare to zeromv
1717 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1718 this_mode == ZEROMV) &&
1719 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1721 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1722 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1723 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1724 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1725 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1727 if (this_mode == NEARMV) {
1730 } else if (this_mode == NEARESTMV) {
1734 assert(this_mode == ZEROMV);
1735 if (!has_second_rf) {
1737 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1739 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1743 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1744 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1746 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1747 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1753 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1754 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1755 sizeof(bsi->rdstat[i][mode_idx].ta));
1756 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1757 sizeof(bsi->rdstat[i][mode_idx].tl));
1759 // motion search for newmv (single predictor case only)
1760 if (!has_second_rf && this_mode == NEWMV &&
1761 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1764 int thissme, bestsme = INT_MAX;
1765 int sadpb = x->sadperbit4;
1769 /* Is the best so far sufficiently good that we cant justify doing
1770 * and new motion search. */
1771 if (best_rd < label_mv_thresh)
1774 if (cpi->compressor_speed) {
1775 // use previous block's result as next block's MV predictor.
1777 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1779 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1783 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1785 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1787 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1788 // Take wtd average of the step_params based on the last frame's
1789 // max mv magnitude and the best ref mvs of the current block for
1790 // the given reference.
1791 step_param = (vp9_init_search_range(cpi, max_mv) +
1792 cpi->mv_step_param) >> 1;
1794 step_param = cpi->mv_step_param;
1797 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1798 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1800 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
1801 mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1802 mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1803 step_param = MAX(step_param, 8);
1806 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1807 // adjust src pointer for this block
1809 if (cpi->sf.search_method == HEX) {
1810 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
1812 sadpb, 1, v_fn_ptr, 1,
1813 &bsi->ref_mv->as_mv,
1814 &mode_mv[NEWMV].as_mv);
1815 } else if (cpi->sf.search_method == SQUARE) {
1816 bestsme = vp9_square_search(x, &mvp_full.as_mv,
1818 sadpb, 1, v_fn_ptr, 1,
1819 &bsi->ref_mv->as_mv,
1820 &mode_mv[NEWMV].as_mv);
1821 } else if (cpi->sf.search_method == BIGDIA) {
1822 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
1824 sadpb, 1, v_fn_ptr, 1,
1825 &bsi->ref_mv->as_mv,
1826 &mode_mv[NEWMV].as_mv);
1828 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1829 sadpb, further_steps, 0, v_fn_ptr,
1830 bsi->ref_mv, &mode_mv[NEWMV]);
1833 // Should we do a full search (best quality only)
1834 if (cpi->compressor_speed == 0) {
1835 /* Check if mvp_full is within the range. */
1836 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1837 x->mv_row_min, x->mv_row_max);
1839 thissme = cpi->full_search_sad(x, &mvp_full,
1840 sadpb, 16, v_fn_ptr,
1841 x->nmvjointcost, x->mvcost,
1844 if (thissme < bestsme) {
1846 mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int;
1848 /* The full search result is actually worse so re-instate the
1849 * previous best vector */
1850 mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int;
1854 if (bestsme < INT_MAX) {
1857 cpi->find_fractional_mv_step(x,
1858 &mode_mv[NEWMV].as_mv,
1859 &bsi->ref_mv->as_mv,
1860 cpi->common.allow_high_precision_mv,
1861 x->errorperbit, v_fn_ptr,
1862 0, cpi->sf.subpel_iters_per_step,
1863 x->nmvjointcost, x->mvcost,
1866 // save motion search result for use in compound prediction
1867 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1870 if (cpi->sf.adaptive_motion_search)
1871 x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1873 // restore src pointers
1874 mi_buf_restore(x, orig_src, orig_pre);
1877 if (has_second_rf) {
1878 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1879 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1883 if (has_second_rf && this_mode == NEWMV &&
1884 mbmi->interp_filter == EIGHTTAP) {
1885 // adjust src pointers
1887 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1889 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1890 mi_row, mi_col, seg_mvs[i],
1892 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1893 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1894 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1895 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1897 // restore src pointers
1898 mi_buf_restore(x, orig_src, orig_pre);
1901 bsi->rdstat[i][mode_idx].brate =
1902 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1903 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1904 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1908 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1909 if (num_4x4_blocks_wide > 1)
1910 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1911 mode_mv[this_mode].as_int;
1912 if (num_4x4_blocks_high > 1)
1913 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1914 mode_mv[this_mode].as_int;
1915 if (has_second_rf) {
1916 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1917 second_mode_mv[this_mode].as_int;
1918 if (num_4x4_blocks_wide > 1)
1919 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1920 second_mode_mv[this_mode].as_int;
1921 if (num_4x4_blocks_high > 1)
1922 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1923 second_mode_mv[this_mode].as_int;
1926 // Trap vectors that reach beyond the UMV borders
1927 if (mv_check_bounds(x, &mode_mv[this_mode]))
1929 if (has_second_rf &&
1930 mv_check_bounds(x, &second_mode_mv[this_mode]))
1933 if (filter_idx > 0) {
1934 BEST_SEG_INFO *ref_bsi = bsi_buf;
1935 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
1936 (mode_mv[this_mode].as_mv.col & 0x0f);
1937 have_ref = mode_mv[this_mode].as_int ==
1938 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1939 if (has_second_rf) {
1940 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
1941 (second_mode_mv[this_mode].as_mv.col & 0x0f);
1942 have_ref &= second_mode_mv[this_mode].as_int ==
1943 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1946 if (filter_idx > 1 && !subpelmv && !have_ref) {
1947 ref_bsi = bsi_buf + 1;
1948 have_ref = mode_mv[this_mode].as_int ==
1949 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1950 if (has_second_rf) {
1951 have_ref &= second_mode_mv[this_mode].as_int ==
1952 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1956 if (!subpelmv && have_ref &&
1957 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1958 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1959 sizeof(SEG_RDSTAT));
1960 if (num_4x4_blocks_wide > 1)
1961 bsi->rdstat[i + 1][mode_idx].eobs =
1962 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1963 if (num_4x4_blocks_high > 1)
1964 bsi->rdstat[i + 2][mode_idx].eobs =
1965 ref_bsi->rdstat[i + 2][mode_idx].eobs;
1967 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1968 mode_selected = this_mode;
1969 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1975 bsi->rdstat[i][mode_idx].brdcost =
1976 encode_inter_mb_segment(cpi, x,
1977 bsi->segment_rd - this_segment_rd, i,
1978 &bsi->rdstat[i][mode_idx].byrate,
1979 &bsi->rdstat[i][mode_idx].bdist,
1980 &bsi->rdstat[i][mode_idx].bsse,
1981 bsi->rdstat[i][mode_idx].ta,
1982 bsi->rdstat[i][mode_idx].tl);
1983 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1984 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1985 bsi->rdstat[i][mode_idx].brate, 0);
1986 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1987 bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];
1988 if (num_4x4_blocks_wide > 1)
1989 bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];
1990 if (num_4x4_blocks_high > 1)
1991 bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];
1994 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1995 mode_selected = this_mode;
1996 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1998 } /*for each 4x4 mode*/
2000 if (best_rd == INT64_MAX) {
2002 for (iy = i + 1; iy < 4; ++iy)
2003 for (midx = 0; midx < INTER_MODES; ++midx)
2004 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2005 bsi->segment_rd = INT64_MAX;
2009 mode_idx = INTER_OFFSET(mode_selected);
2010 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2011 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2013 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
2014 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
2015 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2018 br += bsi->rdstat[i][mode_idx].brate;
2019 bd += bsi->rdstat[i][mode_idx].bdist;
2020 block_sse += bsi->rdstat[i][mode_idx].bsse;
2021 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2022 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2024 if (this_segment_rd > bsi->segment_rd) {
2026 for (iy = i + 1; iy < 4; ++iy)
2027 for (midx = 0; midx < INTER_MODES; ++midx)
2028 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2029 bsi->segment_rd = INT64_MAX;
2033 } /* for each label */
2037 bsi->segment_yrate = segmentyrate;
2038 bsi->segment_rd = this_segment_rd;
2039 bsi->sse = block_sse;
2041 // update the coding decisions
2042 for (i = 0; i < 4; ++i)
2043 bsi->modes[i] = mi->bmi[i].as_mode;
2046 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2047 const TileInfo *const tile,
2048 int_mv *best_ref_mv,
2049 int_mv *second_best_ref_mv,
2053 int64_t *returndistortion,
2054 int *skippable, int64_t *psse,
2056 int_mv seg_mvs[4][MAX_REF_FRAMES],
2057 BEST_SEG_INFO *bsi_buf,
2059 int mi_row, int mi_col) {
2061 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2062 MACROBLOCKD *xd = &x->e_mbd;
2063 MODE_INFO *mi = xd->mi_8x8[0];
2064 MB_MODE_INFO *mbmi = &mi->mbmi;
2069 bsi->segment_rd = best_rd;
2070 bsi->ref_mv = best_ref_mv;
2071 bsi->second_ref_mv = second_best_ref_mv;
2072 bsi->mvp.as_int = best_ref_mv->as_int;
2073 bsi->mvthresh = mvthresh;
2075 for (i = 0; i < 4; i++)
2076 bsi->modes[i] = ZEROMV;
2078 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2081 if (bsi->segment_rd > best_rd)
2083 /* set it to the best */
2084 for (i = 0; i < 4; i++) {
2085 mode_idx = INTER_OFFSET(bsi->modes[i]);
2086 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2087 if (has_second_ref(mbmi))
2088 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2089 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2090 mi->bmi[i].as_mode = bsi->modes[i];
2094 * used to set mbmi->mv.as_int
2096 *returntotrate = bsi->r;
2097 *returndistortion = bsi->d;
2098 *returnyrate = bsi->segment_yrate;
2099 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
2101 mbmi->mode = bsi->modes[3];
2103 return bsi->segment_rd;
2106 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2107 uint8_t *ref_y_buffer, int ref_y_stride,
2108 int ref_frame, BLOCK_SIZE block_size ) {
2109 MACROBLOCKD *xd = &x->e_mbd;
2110 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2115 int best_sad = INT_MAX;
2116 int this_sad = INT_MAX;
2117 unsigned int max_mv = 0;
2119 uint8_t *src_y_ptr = x->plane[0].src.buf;
2121 int row_offset, col_offset;
2122 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2123 (cpi->sf.adaptive_motion_search &&
2124 cpi->common.show_frame &&
2125 block_size < cpi->sf.max_partition_size);
2127 // Get the sad for each candidate reference mv
2128 for (i = 0; i < num_mv_refs; i++) {
2129 this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
2130 mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
2132 max_mv = MAX(max_mv,
2133 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2134 // The list is at an end if we see 0 for a second time.
2135 if (!this_mv.as_int && zero_seen)
2137 zero_seen = zero_seen || !this_mv.as_int;
2139 row_offset = this_mv.as_mv.row >> 3;
2140 col_offset = this_mv.as_mv.col >> 3;
2141 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2143 // Find sad for current vector.
2144 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2145 ref_y_ptr, ref_y_stride,
2148 // Note if it is the best so far.
2149 if (this_sad < best_sad) {
2150 best_sad = this_sad;
2155 // Note the index of the mv that worked best in the reference list.
2156 x->mv_best_ref_index[ref_frame] = best_index;
2157 x->max_mv_context[ref_frame] = max_mv;
2160 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2161 unsigned int *ref_costs_single,
2162 unsigned int *ref_costs_comp,
2163 vp9_prob *comp_mode_p) {
2164 VP9_COMMON *const cm = &cpi->common;
2165 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2166 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2168 if (seg_ref_active) {
2169 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2170 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2173 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2174 vp9_prob comp_inter_p = 128;
2176 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
2177 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
2178 *comp_mode_p = comp_inter_p;
2183 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2185 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
2186 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2187 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2188 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2190 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2191 base_cost += vp9_cost_bit(comp_inter_p, 0);
2193 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2194 ref_costs_single[ALTREF_FRAME] = base_cost;
2195 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2196 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2197 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2198 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2199 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2201 ref_costs_single[LAST_FRAME] = 512;
2202 ref_costs_single[GOLDEN_FRAME] = 512;
2203 ref_costs_single[ALTREF_FRAME] = 512;
2205 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
2206 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2207 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2209 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2210 base_cost += vp9_cost_bit(comp_inter_p, 1);
2212 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2213 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2215 ref_costs_comp[LAST_FRAME] = 512;
2216 ref_costs_comp[GOLDEN_FRAME] = 512;
2221 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2224 int_mv *second_ref_mv,
2225 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2226 int64_t tx_size_diff[TX_MODES],
2227 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2228 MACROBLOCKD *const xd = &x->e_mbd;
2230 // Take a snapshot of the coding context so it can be
2231 // restored if we decide to encode this way
2232 ctx->skip = x->skip;
2233 ctx->best_mode_index = mode_index;
2234 ctx->mic = *xd->mi_8x8[0];
2236 ctx->best_ref_mv.as_int = ref_mv->as_int;
2237 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2239 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2240 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2241 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2243 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2244 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2245 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2248 static void setup_pred_block(const MACROBLOCKD *xd,
2249 struct buf_2d dst[MAX_MB_PLANE],
2250 const YV12_BUFFER_CONFIG *src,
2251 int mi_row, int mi_col,
2252 const struct scale_factors *scale,
2253 const struct scale_factors *scale_uv) {
2256 dst[0].buf = src->y_buffer;
2257 dst[0].stride = src->y_stride;
2258 dst[1].buf = src->u_buffer;
2259 dst[2].buf = src->v_buffer;
2260 dst[1].stride = dst[2].stride = src->uv_stride;
2262 dst[3].buf = src->alpha_buffer;
2263 dst[3].stride = src->alpha_stride;
2266 // TODO(jkoleszar): Make scale factors per-plane data
2267 for (i = 0; i < MAX_MB_PLANE; i++) {
2268 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2269 i ? scale_uv : scale,
2270 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2274 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2275 const TileInfo *const tile,
2276 int idx, MV_REFERENCE_FRAME frame_type,
2277 BLOCK_SIZE block_size,
2278 int mi_row, int mi_col,
2279 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2280 int_mv frame_near_mv[MAX_REF_FRAMES],
2281 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2282 struct scale_factors scale[MAX_REF_FRAMES]) {
2283 VP9_COMMON *cm = &cpi->common;
2284 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2285 MACROBLOCKD *const xd = &x->e_mbd;
2286 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2288 // set up scaling factors
2289 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2291 scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type],
2292 mi_row * MI_SIZE, mi_col * MI_SIZE);
2294 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2295 // use the UV scaling factors.
2296 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2297 &scale[frame_type], &scale[frame_type]);
2299 // Gets an initial list of candidate vectors from neighbours and orders them
2300 vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0],
2303 mbmi->ref_mvs[frame_type], mi_row, mi_col);
2305 // Candidate refinement carried out at encoder and decoder
2306 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
2307 mbmi->ref_mvs[frame_type],
2308 &frame_nearest_mv[frame_type],
2309 &frame_near_mv[frame_type]);
2311 // Further refinement that is encode side only to test the top few candidates
2312 // in full and choose the best as the centre point for subsequent searches.
2313 // The current implementation doesn't support scaling.
2314 if (!vp9_is_scaled(scale[frame_type].sfc) && block_size >= BLOCK_8X8)
2315 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2316 frame_type, block_size);
2319 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2320 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2321 int fb = get_ref_frame_idx(cpi, ref_frame);
2322 int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame);
2323 if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb])
2324 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]];
2325 return scaled_ref_frame;
2328 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2329 const MACROBLOCKD *const xd = &x->e_mbd;
2330 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2331 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2332 return SWITCHABLE_INTERP_RATE_FACTOR *
2333 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2336 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2337 const TileInfo *const tile,
2339 int mi_row, int mi_col,
2340 int_mv *tmp_mv, int *rate_mv) {
2341 MACROBLOCKD *xd = &x->e_mbd;
2342 VP9_COMMON *cm = &cpi->common;
2343 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2344 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2345 int bestsme = INT_MAX;
2346 int further_steps, step_param;
2347 int sadpb = x->sadperbit16;
2349 int ref = mbmi->ref_frame[0];
2350 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2351 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2353 int tmp_col_min = x->mv_col_min;
2354 int tmp_col_max = x->mv_col_max;
2355 int tmp_row_min = x->mv_row_min;
2356 int tmp_row_max = x->mv_row_max;
2358 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2360 if (scaled_ref_frame) {
2362 // Swap out the reference frame for a version that's been scaled to
2363 // match the resolution of the current frame, allowing the existing
2364 // motion search code to be used without additional modifications.
2365 for (i = 0; i < MAX_MB_PLANE; i++)
2366 backup_yv12[i] = xd->plane[i].pre[0];
2368 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2371 vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
2373 // Adjust search parameters based on small partitions' result.
2375 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2376 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2377 // adjust search range
2382 // Get prediction MV.
2383 mvp_full.as_int = x->pred_mv[ref].as_int;
2385 // Adjust MV sign if needed.
2386 if (cm->ref_frame_sign_bias[ref]) {
2387 mvp_full.as_mv.col *= -1;
2388 mvp_full.as_mv.row *= -1;
2391 // Work out the size of the first step in the mv step search.
2392 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2393 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2394 // Take wtd average of the step_params based on the last frame's
2395 // max mv magnitude and that based on the best ref mvs of the current
2396 // block for the given reference.
2397 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2398 cpi->mv_step_param) >> 1;
2400 step_param = cpi->mv_step_param;
2404 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2405 cpi->common.show_frame) {
2406 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2407 b_width_log2(bsize)));
2408 step_param = MAX(step_param, boffset);
2411 mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
2412 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
2413 x->pred_mv[ref].as_int;
2415 mvp_full.as_mv.col >>= 3;
2416 mvp_full.as_mv.row >>= 3;
2418 // Further step/diamond searches as necessary
2419 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2421 if (cpi->sf.search_method == HEX) {
2422 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
2425 &cpi->fn_ptr[block_size], 1,
2426 &ref_mv.as_mv, &tmp_mv->as_mv);
2427 } else if (cpi->sf.search_method == SQUARE) {
2428 bestsme = vp9_square_search(x, &mvp_full.as_mv,
2431 &cpi->fn_ptr[block_size], 1,
2432 &ref_mv.as_mv, &tmp_mv->as_mv);
2433 } else if (cpi->sf.search_method == BIGDIA) {
2434 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
2437 &cpi->fn_ptr[block_size], 1,
2438 &ref_mv.as_mv, &tmp_mv->as_mv);
2440 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2441 sadpb, further_steps, 1,
2442 &cpi->fn_ptr[block_size],
2446 x->mv_col_min = tmp_col_min;
2447 x->mv_col_max = tmp_col_max;
2448 x->mv_row_min = tmp_row_min;
2449 x->mv_row_max = tmp_row_max;
2451 if (bestsme < INT_MAX) {
2452 int dis; /* TODO: use dis in distortion calculation later. */
2454 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
2455 cm->allow_high_precision_mv,
2457 &cpi->fn_ptr[block_size],
2458 0, cpi->sf.subpel_iters_per_step,
2459 x->nmvjointcost, x->mvcost,
2462 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
2463 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2465 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2466 x->pred_mv[ref].as_int = tmp_mv->as_int;
2468 if (scaled_ref_frame) {
2470 for (i = 0; i < MAX_MB_PLANE; i++)
2471 xd->plane[i].pre[0] = backup_yv12[i];
2475 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2478 int mi_row, int mi_col,
2479 int_mv single_newmv[MAX_REF_FRAMES],
2481 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2482 MACROBLOCKD *xd = &x->e_mbd;
2483 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2484 const int refs[2] = { mbmi->ref_frame[0],
2485 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2487 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2489 // Prediction buffer from second frame.
2490 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2492 // Do joint motion search in compound mode to get more accurate mv.
2493 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2494 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2495 int last_besterr[2] = {INT_MAX, INT_MAX};
2496 YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2497 get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2498 get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2501 for (ref = 0; ref < 2; ++ref) {
2502 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2504 if (scaled_ref_frame[ref]) {
2506 // Swap out the reference frame for a version that's been scaled to
2507 // match the resolution of the current frame, allowing the existing
2508 // motion search code to be used without additional modifications.
2509 for (i = 0; i < MAX_MB_PLANE; i++)
2510 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2511 setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
2514 xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref],
2516 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2519 // Allow joint search multiple times iteratively for each ref frame
2520 // and break out the search loop if it couldn't find better mv.
2521 for (ite = 0; ite < 4; ite++) {
2522 struct buf_2d ref_yv12[2];
2523 int bestsme = INT_MAX;
2524 int sadpb = x->sadperbit16;
2526 int search_range = 3;
2528 int tmp_col_min = x->mv_col_min;
2529 int tmp_col_max = x->mv_col_max;
2530 int tmp_row_min = x->mv_row_min;
2531 int tmp_row_max = x->mv_row_max;
2534 // Initialized here because of compiler problem in Visual Studio.
2535 ref_yv12[0] = xd->plane[0].pre[0];
2536 ref_yv12[1] = xd->plane[0].pre[1];
2538 // Get pred block from second frame.
2539 vp9_build_inter_predictor(ref_yv12[!id].buf,
2540 ref_yv12[!id].stride,
2542 &frame_mv[refs[!id]].as_mv,
2543 &xd->scale_factor[!id],
2545 &xd->subpix, MV_PRECISION_Q3);
2547 // Compound motion search on first ref frame.
2549 xd->plane[0].pre[0] = ref_yv12[id];
2550 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
2552 // Use mv result from single mode as mvp.
2553 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2555 tmp_mv.as_mv.col >>= 3;
2556 tmp_mv.as_mv.row >>= 3;
2558 // Small-range full-pixel motion search
2559 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2561 &cpi->fn_ptr[block_size],
2562 x->nmvjointcost, x->mvcost,
2563 &ref_mv[id], second_pred,
2566 x->mv_col_min = tmp_col_min;
2567 x->mv_col_max = tmp_col_max;
2568 x->mv_row_min = tmp_row_min;
2569 x->mv_row_max = tmp_row_max;
2571 if (bestsme < INT_MAX) {
2572 int dis; /* TODO: use dis in distortion calculation later. */
2575 bestsme = cpi->find_fractional_mv_step_comp(
2578 cpi->common.allow_high_precision_mv,
2580 &cpi->fn_ptr[block_size],
2581 0, cpi->sf.subpel_iters_per_step,
2582 x->nmvjointcost, x->mvcost,
2583 &dis, &sse, second_pred,
2588 xd->plane[0].pre[0] = scaled_first_yv12;
2590 if (bestsme < last_besterr[id]) {
2591 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2592 last_besterr[id] = bestsme;
2600 for (ref = 0; ref < 2; ++ref) {
2601 if (scaled_ref_frame[ref]) {
2602 // restore the predictor
2604 for (i = 0; i < MAX_MB_PLANE; i++)
2605 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2608 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2609 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2610 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2613 vpx_free(second_pred);
2616 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2617 const TileInfo *const tile,
2619 int64_t txfm_cache[],
2620 int *rate2, int64_t *distortion,
2622 int *rate_y, int64_t *distortion_y,
2623 int *rate_uv, int64_t *distortion_uv,
2624 int *mode_excluded, int *disable_skip,
2625 INTERPOLATION_TYPE *best_filter,
2626 int_mv (*mode_mv)[MAX_REF_FRAMES],
2627 int mi_row, int mi_col,
2628 int_mv single_newmv[MAX_REF_FRAMES],
2630 const int64_t ref_best_rd) {
2631 VP9_COMMON *cm = &cpi->common;
2632 MACROBLOCKD *xd = &x->e_mbd;
2633 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2634 const int is_comp_pred = has_second_ref(mbmi);
2635 const int num_refs = is_comp_pred ? 2 : 1;
2636 const int this_mode = mbmi->mode;
2637 int_mv *frame_mv = mode_mv[this_mode];
2639 int refs[2] = { mbmi->ref_frame[0],
2640 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2642 int64_t this_rd = 0;
2643 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2644 int pred_exists = 0;
2646 int64_t rd, best_rd = INT64_MAX;
2647 int best_needs_copy = 0;
2648 uint8_t *orig_dst[MAX_MB_PLANE];
2649 int orig_dst_stride[MAX_MB_PLANE];
2653 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2654 frame_mv[refs[1]].as_int == INVALID_MV)
2658 if (this_mode == NEWMV) {
2661 // Initialize mv using single prediction mode result.
2662 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2663 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2665 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2666 joint_motion_search(cpi, x, bsize, frame_mv,
2667 mi_row, mi_col, single_newmv, &rate_mv);
2669 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2670 &mbmi->ref_mvs[refs[0]][0].as_mv,
2671 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2672 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2673 &mbmi->ref_mvs[refs[1]][0].as_mv,
2674 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2679 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2682 frame_mv[refs[0]].as_int =
2683 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2684 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2688 // if we're near/nearest and mv == 0,0, compare to zeromv
2689 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2690 frame_mv[refs[0]].as_int == 0 &&
2691 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2692 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2693 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2694 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2695 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2696 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2698 if (this_mode == NEARMV) {
2701 } else if (this_mode == NEARESTMV) {
2705 assert(this_mode == ZEROMV);
2706 if (num_refs == 1) {
2708 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2710 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2714 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2715 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2717 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2718 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2724 for (i = 0; i < num_refs; ++i) {
2725 cur_mv[i] = frame_mv[refs[i]];
2726 // Clip "next_nearest" so that it does not extend to far out of image
2727 if (this_mode != NEWMV)
2728 clamp_mv2(&cur_mv[i].as_mv, xd);
2730 if (mv_check_bounds(x, &cur_mv[i]))
2732 mbmi->mv[i].as_int = cur_mv[i].as_int;
2735 // do first prediction into the destination buffer. Do the next
2736 // prediction into a temporary buffer. Then keep track of which one
2737 // of these currently holds the best predictor, and use the other
2738 // one for future predictions. In the end, copy from tmp_buf to
2739 // dst if necessary.
2740 for (i = 0; i < MAX_MB_PLANE; i++) {
2741 orig_dst[i] = xd->plane[i].dst.buf;
2742 orig_dst_stride[i] = xd->plane[i].dst.stride;
2745 /* We don't include the cost of the second reference here, because there
2746 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2747 * words if you present them in that order, the second one is always known
2748 * if the first is known */
2749 *rate2 += cost_mv_ref(cpi, this_mode,
2750 mbmi->mode_context[mbmi->ref_frame[0]]);
2752 if (!(*mode_excluded)) {
2754 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2756 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2761 // Are all MVs integer pel for Y and UV
2762 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2763 (mbmi->mv[0].as_mv.col & 15) == 0;
2765 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2766 (mbmi->mv[1].as_mv.col & 15) == 0;
2767 // Search for best switchable filter by checking the variance of
2768 // pred error irrespective of whether the filter will be used
2769 if (cm->mcomp_filter_type != BILINEAR) {
2770 *best_filter = EIGHTTAP;
2771 if (x->source_variance <
2772 cpi->sf.disable_filter_search_var_thresh) {
2773 *best_filter = EIGHTTAP;
2774 vp9_zero(cpi->rd_filter_cache);
2777 int tmp_rate_sum = 0;
2778 int64_t tmp_dist_sum = 0;
2780 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
2781 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2784 mbmi->interp_filter = i;
2785 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2786 rs = get_switchable_rate(x);
2787 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2789 if (i > 0 && intpel_mv) {
2790 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2791 tmp_rate_sum, tmp_dist_sum);
2792 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2793 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2794 cpi->rd_filter_cache[i] + rs_rd);
2795 rd = cpi->rd_filter_cache[i];
2796 if (cm->mcomp_filter_type == SWITCHABLE)
2800 int64_t dist_sum = 0;
2801 if ((cm->mcomp_filter_type == SWITCHABLE &&
2802 (!i || best_needs_copy)) ||
2803 (cm->mcomp_filter_type != SWITCHABLE &&
2804 (cm->mcomp_filter_type == mbmi->interp_filter ||
2805 (i == 0 && intpel_mv)))) {
2806 for (j = 0; j < MAX_MB_PLANE; j++) {
2807 xd->plane[j].dst.buf = orig_dst[j];
2808 xd->plane[j].dst.stride = orig_dst_stride[j];
2811 for (j = 0; j < MAX_MB_PLANE; j++) {
2812 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2813 xd->plane[j].dst.stride = 64;
2816 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2817 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2818 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2819 rate_sum, dist_sum);
2820 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2821 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2822 cpi->rd_filter_cache[i] + rs_rd);
2823 rd = cpi->rd_filter_cache[i];
2824 if (cm->mcomp_filter_type == SWITCHABLE)
2826 if (i == 0 && intpel_mv) {
2827 tmp_rate_sum = rate_sum;
2828 tmp_dist_sum = dist_sum;
2831 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2832 if (rd / 2 > ref_best_rd) {
2833 for (i = 0; i < MAX_MB_PLANE; i++) {
2834 xd->plane[i].dst.buf = orig_dst[i];
2835 xd->plane[i].dst.stride = orig_dst_stride[i];
2840 newbest = i == 0 || rd < best_rd;
2844 *best_filter = mbmi->interp_filter;
2845 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
2846 best_needs_copy = !best_needs_copy;
2849 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2850 (cm->mcomp_filter_type != SWITCHABLE &&
2851 cm->mcomp_filter_type == mbmi->interp_filter)) {
2856 for (i = 0; i < MAX_MB_PLANE; i++) {
2857 xd->plane[i].dst.buf = orig_dst[i];
2858 xd->plane[i].dst.stride = orig_dst_stride[i];
2862 // Set the appropriate filter
2863 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2864 cm->mcomp_filter_type : *best_filter;
2865 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2866 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
2869 if (best_needs_copy) {
2870 // again temporarily set the buffers to local memory to prevent a memcpy
2871 for (i = 0; i < MAX_MB_PLANE; i++) {
2872 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2873 xd->plane[i].dst.stride = 64;
2877 // Handles the special case when a filter that is not in the
2878 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2879 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2883 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2886 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2887 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2888 // if current pred_error modeled rd is substantially more than the best
2889 // so far, do not bother doing full rd
2890 if (rd / 2 > ref_best_rd) {
2891 for (i = 0; i < MAX_MB_PLANE; i++) {
2892 xd->plane[i].dst.buf = orig_dst[i];
2893 xd->plane[i].dst.stride = orig_dst_stride[i];
2899 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2900 *rate2 += get_switchable_rate(x);
2902 if (!is_comp_pred && cpi->enable_encode_breakout) {
2903 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2905 else if (x->encode_breakout) {
2906 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2907 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2908 unsigned int var, sse;
2909 // Skipping threshold for ac.
2910 unsigned int thresh_ac;
2911 // The encode_breakout input
2912 unsigned int encode_breakout = x->encode_breakout << 4;
2913 unsigned int max_thresh = 36000;
2915 // Use extreme low threshold for static frames to limit skipping.
2916 if (cpi->enable_encode_breakout == 2)
2919 // Calculate threshold according to dequant value.
2920 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2922 // Use encode_breakout input if it is bigger than internal threshold.
2923 if (thresh_ac < encode_breakout)
2924 thresh_ac = encode_breakout;
2926 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2927 if (thresh_ac > max_thresh)
2928 thresh_ac = max_thresh;
2930 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2931 xd->plane[0].dst.buf,
2932 xd->plane[0].dst.stride, &sse);
2934 // Adjust threshold according to partition size.
2935 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2936 b_height_log2_lookup[bsize]);
2938 // Y skipping condition checking
2939 if (sse < thresh_ac || sse == 0) {
2940 // Skipping threshold for dc
2941 unsigned int thresh_dc;
2943 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2945 // dc skipping checking
2946 if ((sse - var) < thresh_dc || sse == var) {
2947 unsigned int sse_u, sse_v;
2948 unsigned int var_u, var_v;
2950 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2951 x->plane[1].src.stride,
2952 xd->plane[1].dst.buf,
2953 xd->plane[1].dst.stride, &sse_u);
2955 // U skipping condition checking
2956 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2957 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2958 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2959 x->plane[2].src.stride,
2960 xd->plane[2].dst.buf,
2961 xd->plane[2].dst.stride, &sse_v);
2963 // V skipping condition checking
2964 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2965 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2968 // The cost of skip bit needs to be added.
2969 *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
2971 // Scaling factor for SSE from spatial domain to frequency domain
2972 // is 16. Adjust distortion accordingly.
2973 *distortion_uv = (sse_u + sse_v) << 4;
2974 *distortion = (sse << 4) + *distortion_uv;
2977 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2986 int skippable_y, skippable_uv;
2987 int64_t sseuv = INT64_MAX;
2988 int64_t rdcosty = INT64_MAX;
2990 // Y cost and distortion
2991 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2992 bsize, txfm_cache, ref_best_rd);
2994 if (*rate_y == INT_MAX) {
2996 *distortion = INT64_MAX;
2997 for (i = 0; i < MAX_MB_PLANE; i++) {
2998 xd->plane[i].dst.buf = orig_dst[i];
2999 xd->plane[i].dst.stride = orig_dst_stride[i];
3005 *distortion += *distortion_y;
3007 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3008 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3010 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
3011 bsize, ref_best_rd - rdcosty);
3012 if (*rate_uv == INT_MAX) {
3014 *distortion = INT64_MAX;
3015 for (i = 0; i < MAX_MB_PLANE; i++) {
3016 xd->plane[i].dst.buf = orig_dst[i];
3017 xd->plane[i].dst.stride = orig_dst_stride[i];
3024 *distortion += *distortion_uv;
3025 *skippable = skippable_y && skippable_uv;
3028 for (i = 0; i < MAX_MB_PLANE; i++) {
3029 xd->plane[i].dst.buf = orig_dst[i];
3030 xd->plane[i].dst.stride = orig_dst_stride[i];
3033 return this_rd; // if 0, this will be re-calculated by caller
3036 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
3038 struct macroblock_plane *const p = x->plane;
3039 struct macroblockd_plane *const pd = x->e_mbd.plane;
3041 for (i = 0; i < MAX_MB_PLANE; ++i) {
3042 p[i].coeff = ctx->coeff_pbuf[i][1];
3043 pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3044 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3045 pd[i].eobs = ctx->eobs_pbuf[i][1];
3047 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3048 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3049 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3050 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3052 ctx->coeff_pbuf[i][0] = p[i].coeff;
3053 ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
3054 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3055 ctx->eobs_pbuf[i][0] = pd[i].eobs;
3059 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3060 int *returnrate, int64_t *returndist,
3062 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3063 VP9_COMMON *const cm = &cpi->common;
3064 MACROBLOCKD *const xd = &x->e_mbd;
3065 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3066 int y_skip = 0, uv_skip = 0;
3067 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3070 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3071 if (bsize >= BLOCK_8X8) {
3072 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3073 &dist_y, &y_skip, bsize, tx_cache,
3074 best_rd) >= best_rd) {
3075 *returnrate = INT_MAX;
3078 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3079 &dist_uv, &uv_skip, bsize);
3082 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3083 &dist_y, best_rd) >= best_rd) {
3084 *returnrate = INT_MAX;
3087 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3088 &dist_uv, &uv_skip, BLOCK_8X8);
3091 if (y_skip && uv_skip) {
3092 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3093 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3094 *returndist = dist_y + dist_uv;
3095 vp9_zero(ctx->tx_rd_diff);
3098 *returnrate = rate_y + rate_uv +
3099 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3100 *returndist = dist_y + dist_uv;
3101 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3102 for (i = 0; i < TX_MODES; i++) {
3103 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3104 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3106 ctx->tx_rd_diff[i] = 0;
3110 ctx->mic = *xd->mi_8x8[0];
3113 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3114 const TileInfo *const tile,
3115 int mi_row, int mi_col,
3117 int64_t *returndistortion,
3119 PICK_MODE_CONTEXT *ctx,
3120 int64_t best_rd_so_far) {
3121 VP9_COMMON *cm = &cpi->common;
3122 MACROBLOCKD *xd = &x->e_mbd;
3123 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3124 const struct segmentation *seg = &cm->seg;
3125 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3126 MB_PREDICTION_MODE this_mode;
3127 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3128 unsigned char segment_id = mbmi->segment_id;
3130 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3131 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3132 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3133 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3135 int idx_list[4] = {0,
3139 int64_t best_rd = best_rd_so_far;
3140 int64_t best_tx_rd[TX_MODES];
3141 int64_t best_tx_diff[TX_MODES];
3142 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3143 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3144 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3145 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3146 MB_MODE_INFO best_mbmode = { 0 };
3148 int mode_index, best_mode_index = 0;
3149 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3150 vp9_prob comp_mode_p;
3151 int64_t best_intra_rd = INT64_MAX;
3152 int64_t best_inter_rd = INT64_MAX;
3153 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3154 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3155 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3156 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3157 int64_t dist_uv[TX_SIZES];
3158 int skip_uv[TX_SIZES];
3159 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3160 struct scale_factors scale_factor[4];
3161 unsigned int ref_frame_mask = 0;
3162 unsigned int mode_mask = 0;
3163 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3164 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3165 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3166 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3167 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3170 x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
3172 // Everywhere the flag is set the error is much higher than its neighbors.
3173 ctx->frames_with_high_error = 0;
3174 ctx->modes_with_high_error = 0;
3176 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3179 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3180 best_pred_rd[i] = INT64_MAX;
3181 for (i = 0; i < TX_MODES; i++)
3182 best_tx_rd[i] = INT64_MAX;
3183 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3184 best_filter_rd[i] = INT64_MAX;
3185 for (i = 0; i < TX_SIZES; i++)
3186 rate_uv_intra[i] = INT_MAX;
3188 *returnrate = INT_MAX;
3190 // Create a mask set to 1 for each reference frame used by a smaller
3192 if (cpi->sf.use_avoid_tested_higherror) {
3193 switch (block_size) {
3195 for (i = 0; i < 4; i++) {
3196 for (j = 0; j < 4; j++) {
3197 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3198 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3201 for (i = 0; i < 4; i++) {
3202 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3203 mode_mask |= x->sb32_context[i].modes_with_high_error;
3207 for (i = 0; i < 4; i++) {
3209 x->mb_context[x->sb_index][i].frames_with_high_error;
3210 mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error;
3214 // Until we handle all block sizes set it to present;
3219 ref_frame_mask = ~ref_frame_mask;
3220 mode_mask = ~mode_mask;
3223 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3224 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3225 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3226 block_size, mi_row, mi_col,
3227 frame_mv[NEARESTMV], frame_mv[NEARMV],
3228 yv12_mb, scale_factor);
3230 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3231 frame_mv[ZEROMV][ref_frame].as_int = 0;
3234 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3235 int mode_excluded = 0;
3236 int64_t this_rd = INT64_MAX;
3237 int disable_skip = 0;
3238 int compmode_cost = 0;
3239 int rate2 = 0, rate_y = 0, rate_uv = 0;
3240 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3242 int64_t tx_cache[TX_MODES];
3245 int64_t total_sse = INT_MAX;
3248 for (i = 0; i < TX_MODES; ++i)
3249 tx_cache[i] = INT64_MAX;
3252 this_mode = vp9_mode_order[mode_index].mode;
3253 ref_frame = vp9_mode_order[mode_index].ref_frame;
3254 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3256 // Look at the reference frame of the best mode so far and set the
3257 // skip mask to look at a subset of the remaining modes.
3258 if (mode_index > cpi->sf.mode_skip_start) {
3259 if (mode_index == (cpi->sf.mode_skip_start + 1)) {
3260 switch (vp9_mode_order[best_mode_index].ref_frame) {
3262 cpi->mode_skip_mask = 0;
3265 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
3268 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
3271 cpi->mode_skip_mask = ALT_REF_MODE_MASK;
3274 case MAX_REF_FRAMES:
3275 assert(!"Invalid Reference frame");
3278 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3282 // Skip if the current reference frame has been masked off
3283 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3284 (cpi->ref_frame_mask & (1 << ref_frame)))
3287 // Test best rd so far against threshold for trying this mode.
3288 if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
3289 cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
3290 cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
3293 // Do not allow compound prediction if the segment level reference
3294 // frame feature is in use as in this case there can only be one reference.
3295 if ((second_ref_frame > INTRA_FRAME) &&
3296 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3299 // Skip some checking based on small partitions' result.
3300 if (x->fast_ms > 1 && !ref_frame)
3302 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3305 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3306 if (!(ref_frame_mask & (1 << ref_frame))) {
3309 if (!(mode_mask & (1 << this_mode))) {
3312 if (second_ref_frame != NONE
3313 && !(ref_frame_mask & (1 << second_ref_frame))) {
3318 mbmi->ref_frame[0] = ref_frame;
3319 mbmi->ref_frame[1] = second_ref_frame;
3321 if (!(ref_frame == INTRA_FRAME
3322 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3325 if (!(second_ref_frame == NONE
3326 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3330 comp_pred = second_ref_frame > INTRA_FRAME;
3332 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3333 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3335 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3336 if (ref_frame != best_inter_ref_frame &&
3337 second_ref_frame != best_inter_ref_frame)
3341 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3342 mbmi->uv_mode = DC_PRED;
3344 // Evaluate all sub-pel filters irrespective of whether we can use
3345 // them for this frame.
3346 mbmi->interp_filter = cm->mcomp_filter_type;
3347 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
3350 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3352 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3354 mode_excluded = mode_excluded
3356 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3358 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3361 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3365 // Select prediction reference frames.
3366 for (i = 0; i < MAX_MB_PLANE; i++) {
3367 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3369 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3372 // If the segment reference frame feature is enabled....
3373 // then do nothing if the current ref frame is not allowed..
3374 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3375 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3378 // If the segment skip feature is enabled....
3379 // then do nothing if the current mode is not allowed..
3380 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3381 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3383 // Disable this drop out case if the ref frame
3384 // segment level feature is enabled for this segment. This is to
3385 // prevent the possibility that we end up unable to pick any mode.
3386 } else if (!vp9_segfeature_active(seg, segment_id,
3387 SEG_LVL_REF_FRAME)) {
3388 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3389 // unless ARNR filtering is enabled in which case we want
3390 // an unfiltered alternative. We allow near/nearest as well
3391 // because they may result in zero-zero MVs but be cheaper.
3392 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3393 if ((this_mode != ZEROMV &&
3394 !(this_mode == NEARMV &&
3395 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3396 !(this_mode == NEARESTMV &&
3397 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3398 ref_frame != ALTREF_FRAME) {
3403 // TODO(JBB): This is to make up for the fact that we don't have sad
3404 // functions that work when the block size reads outside the umv. We
3405 // should fix this either by making the motion search just work on
3406 // a representative block in the boundary ( first ) and then implement a
3407 // function that does sads when inside the border..
3408 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3409 this_mode == NEWMV) {
3413 #ifdef MODE_TEST_HIT_STATS
3415 // Keep a rcord of the number of test hits at each size
3416 cpi->mode_test_hits[bsize]++;
3420 if (ref_frame == INTRA_FRAME) {
3422 // Disable intra modes other than DC_PRED for blocks with low variance
3423 // Threshold for intra skipping based on source variance
3424 // TODO(debargha): Specialize the threshold for super block sizes
3425 static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
3426 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3428 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3429 this_mode != DC_PRED &&
3430 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3432 // Only search the oblique modes if the best so far is
3433 // one of the neighboring directional modes
3434 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3435 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3436 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3439 mbmi->mode = this_mode;
3440 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3441 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3445 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3446 bsize, tx_cache, best_rd);
3448 if (rate_y == INT_MAX)
3451 uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
3452 if (rate_uv_intra[uv_tx] == INT_MAX) {
3453 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
3454 &rate_uv_tokenonly[uv_tx],
3455 &dist_uv[uv_tx], &skip_uv[uv_tx],
3459 rate_uv = rate_uv_tokenonly[uv_tx];
3460 distortion_uv = dist_uv[uv_tx];
3461 skippable = skippable && skip_uv[uv_tx];
3462 mbmi->uv_mode = mode_uv[uv_tx];
3464 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3465 if (this_mode != DC_PRED && this_mode != TM_PRED)
3466 rate2 += intra_cost_penalty;
3467 distortion2 = distortion_y + distortion_uv;
3469 mbmi->mode = this_mode;
3470 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3471 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3473 &rate2, &distortion2, &skippable,
3474 &rate_y, &distortion_y,
3475 &rate_uv, &distortion_uv,
3476 &mode_excluded, &disable_skip,
3477 &tmp_best_filter, frame_mv,
3479 single_newmv, &total_sse, best_rd);
3480 if (this_rd == INT64_MAX)
3484 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
3485 rate2 += compmode_cost;
3488 // Estimate the reference frame signaling cost and add it
3489 // to the rolling cost variable.
3490 if (second_ref_frame > INTRA_FRAME) {
3491 rate2 += ref_costs_comp[ref_frame];
3493 rate2 += ref_costs_single[ref_frame];
3496 if (!disable_skip) {
3497 // Test for the condition where skip block will be activated
3498 // because there are no non zero coefficients and make any
3499 // necessary adjustment for rate. Ignore if skip is coded at
3500 // segment level as the cost wont have been added in.
3501 // Is Mb level skip allowed (i.e. not coded at segment level).
3502 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3506 // Back out the coefficient coding costs
3507 rate2 -= (rate_y + rate_uv);
3508 // for best yrd calculation
3511 if (mb_skip_allowed) {
3514 // Cost the skip mb case
3515 vp9_prob skip_prob =
3516 vp9_get_pred_prob_mbskip(cm, xd);
3519 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3520 rate2 += prob_skip_cost;
3523 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3524 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3525 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3526 // Add in the cost of the no skip flag.
3527 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3529 rate2 += prob_skip_cost;
3531 // FIXME(rbultje) make this work for splitmv also
3532 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3534 rate2 += prob_skip_cost;
3535 distortion2 = total_sse;
3536 assert(total_sse >= 0);
3537 rate2 -= (rate_y + rate_uv);
3542 } else if (mb_skip_allowed) {
3543 // Add in the cost of the no skip flag.
3544 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3546 rate2 += prob_skip_cost;
3549 // Calculate the final RD estimate for this mode.
3550 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3553 // Keep record of best intra rd
3554 if (!is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3555 this_rd < best_intra_rd) {
3556 best_intra_rd = this_rd;
3557 best_intra_mode = xd->mi_8x8[0]->mbmi.mode;
3560 // Keep record of best inter rd with single reference
3561 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3562 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
3563 !mode_excluded && this_rd < best_inter_rd) {
3564 best_inter_rd = this_rd;
3565 best_inter_ref_frame = ref_frame;
3568 if (!disable_skip && ref_frame == INTRA_FRAME) {
3569 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3570 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3571 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3572 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3575 // Store the respective mode distortions for later use.
3576 if (mode_distortions[this_mode] == -1
3577 || distortion2 < mode_distortions[this_mode]) {
3578 mode_distortions[this_mode] = distortion2;
3580 if (frame_distortions[ref_frame] == -1
3581 || distortion2 < frame_distortions[ref_frame]) {
3582 frame_distortions[ref_frame] = distortion2;
3585 // Did this mode help.. i.e. is it the new best mode
3586 if (this_rd < best_rd || x->skip) {
3587 if (!mode_excluded) {
3588 // Note index of best mode so far
3589 best_mode_index = mode_index;
3591 if (ref_frame == INTRA_FRAME) {
3592 /* required for left and above block mv */
3593 mbmi->mv[0].as_int = 0;
3596 *returnrate = rate2;
3597 *returndistortion = distortion2;
3599 best_mbmode = *mbmi;
3600 best_skip2 = this_skip2;
3601 if (!x->select_txfm_size)
3602 swap_block_ptr(x, ctx);
3603 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3604 sizeof(uint8_t) * ctx->num_4x4_blk);
3606 // TODO(debargha): enhance this test with a better distortion prediction
3607 // based on qp, activity mask and history
3608 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3609 (mode_index > MIN_EARLY_TERM_INDEX)) {
3610 const int qstep = xd->plane[0].dequant[1];
3611 // TODO(debargha): Enhance this by specializing for each mode_index
3613 if (x->source_variance < UINT_MAX) {
3614 const int var_adjust = (x->source_variance < 16);
3615 scale -= var_adjust;
3617 if (ref_frame > INTRA_FRAME &&
3618 distortion2 * scale < qstep * qstep) {
3625 /* keep record of best compound/single-only prediction */
3626 if (!disable_skip && ref_frame != INTRA_FRAME) {
3627 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3629 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
3630 single_rate = rate2 - compmode_cost;
3631 hybrid_rate = rate2;
3633 single_rate = rate2;
3634 hybrid_rate = rate2 + compmode_cost;
3637 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3638 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3640 if (second_ref_frame <= INTRA_FRAME &&
3641 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3642 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3643 } else if (second_ref_frame > INTRA_FRAME &&
3644 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3645 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3647 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3648 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3651 /* keep record of best filter type */
3652 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3653 cm->mcomp_filter_type != BILINEAR) {
3654 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3655 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3656 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3658 // In cases of poor prediction, filter_cache[] can contain really big
3659 // values, which actually are bigger than this_rd itself. This can
3660 // cause negative best_filter_rd[] values, which is obviously silly.
3661 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3662 if (cpi->rd_filter_cache[i] >= ref) {
3663 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3665 // FIXME(rbultje) do this for comppsred also
3667 // To prevent out-of-range computation in
3668 // adj_rd = cpi->rd_filter_cache[i] * this_rd / ref
3669 // cpi->rd_filter_cache[i] / ref is converted to a 256 based ratio.
3670 int tmp = cpi->rd_filter_cache[i] * 256 / ref;
3671 adj_rd = (this_rd * tmp) >> 8;
3673 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3677 /* keep record of best txfm size */
3678 if (bsize < BLOCK_32X32) {
3679 if (bsize < BLOCK_16X16)
3680 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3682 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3684 if (!mode_excluded && this_rd != INT64_MAX) {
3685 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3686 int64_t adj_rd = INT64_MAX;
3687 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3689 if (adj_rd < best_tx_rd[i])
3690 best_tx_rd[i] = adj_rd;
3697 if (x->skip && !comp_pred)
3701 if (best_rd >= best_rd_so_far)
3704 // If we used an estimate for the uv intra rd in the loop above...
3705 if (cpi->sf.use_uv_intra_rd_estimate) {
3706 // Do Intra UV best rd mode selection if best mode choice above was intra.
3707 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3708 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
3709 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
3710 &rate_uv_tokenonly[uv_tx_size],
3711 &dist_uv[uv_tx_size],
3712 &skip_uv[uv_tx_size],
3713 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
3717 // If we are using reference masking and the set mask flag is set then
3718 // create the reference frame mask.
3719 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
3720 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
3722 // Flag all modes that have a distortion thats > 2x the best we found at
3724 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3725 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3728 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3729 ctx->modes_with_high_error |= (1 << mode_index);
3733 // Flag all ref frames that have a distortion thats > 2x the best we found at
3735 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3736 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3737 ctx->frames_with_high_error |= (1 << ref_frame);
3741 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3742 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3743 (best_mbmode.ref_frame[0] == INTRA_FRAME));
3745 // Updating rd_thresh_freq_fact[] here means that the different
3746 // partition/block sizes are handled independently based on the best
3747 // choice for the current partition. It may well be better to keep a scaled
3748 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3749 // combination that wins out.
3750 if (cpi->sf.adaptive_rd_thresh) {
3751 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3752 if (mode_index == best_mode_index) {
3753 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3754 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
3756 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
3757 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3758 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
3759 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3760 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
3767 *mbmi = best_mbmode;
3768 x->skip |= best_skip2;
3770 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3771 if (best_pred_rd[i] == INT64_MAX)
3772 best_pred_diff[i] = INT_MIN;
3774 best_pred_diff[i] = best_rd - best_pred_rd[i];
3778 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3779 if (best_filter_rd[i] == INT64_MAX)
3780 best_filter_diff[i] = 0;
3782 best_filter_diff[i] = best_rd - best_filter_rd[i];
3784 if (cm->mcomp_filter_type == SWITCHABLE)
3785 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3787 vp9_zero(best_filter_diff);
3791 for (i = 0; i < TX_MODES; i++) {
3792 if (best_tx_rd[i] == INT64_MAX)
3793 best_tx_diff[i] = 0;
3795 best_tx_diff[i] = best_rd - best_tx_rd[i];
3798 vp9_zero(best_tx_diff);
3801 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
3803 store_coding_context(x, ctx, best_mode_index,
3804 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3805 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3806 mbmi->ref_frame[1]][0],
3807 best_pred_diff, best_tx_diff, best_filter_diff);
3813 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3814 const TileInfo *const tile,
3815 int mi_row, int mi_col,
3817 int64_t *returndistortion,
3819 PICK_MODE_CONTEXT *ctx,
3820 int64_t best_rd_so_far) {
3821 VP9_COMMON *cm = &cpi->common;
3822 MACROBLOCKD *xd = &x->e_mbd;
3823 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3824 const struct segmentation *seg = &cm->seg;
3825 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3826 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3827 unsigned char segment_id = mbmi->segment_id;
3829 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3830 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3831 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3833 int idx_list[4] = {0,
3837 int64_t best_rd = best_rd_so_far;
3838 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3839 int64_t best_tx_rd[TX_MODES];
3840 int64_t best_tx_diff[TX_MODES];
3841 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3842 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3843 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3844 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3845 MB_MODE_INFO best_mbmode = { 0 };
3846 int mode_index, best_mode_index = 0;
3847 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3848 vp9_prob comp_mode_p;
3849 int64_t best_inter_rd = INT64_MAX;
3850 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3851 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3852 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3853 int64_t dist_uv[TX_SIZES];
3854 int skip_uv[TX_SIZES];
3855 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3856 struct scale_factors scale_factor[4];
3857 unsigned int ref_frame_mask = 0;
3858 unsigned int mode_mask = 0;
3859 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3860 cpi->common.y_dc_delta_q);
3861 int_mv seg_mvs[4][MAX_REF_FRAMES];
3862 b_mode_info best_bmodes[4];
3865 x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
3866 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3868 for (i = 0; i < 4; i++) {
3870 for (j = 0; j < MAX_REF_FRAMES; j++)
3871 seg_mvs[i][j].as_int = INVALID_MV;
3874 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3877 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3878 best_pred_rd[i] = INT64_MAX;
3879 for (i = 0; i < TX_MODES; i++)
3880 best_tx_rd[i] = INT64_MAX;
3881 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3882 best_filter_rd[i] = INT64_MAX;
3883 for (i = 0; i < TX_SIZES; i++)
3884 rate_uv_intra[i] = INT_MAX;
3886 *returnrate = INT_MAX;
3888 // Create a mask set to 1 for each reference frame used by a smaller
3890 if (cpi->sf.use_avoid_tested_higherror) {
3893 ref_frame_mask = ~ref_frame_mask;
3894 mode_mask = ~mode_mask;
3897 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3898 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3899 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3900 block_size, mi_row, mi_col,
3901 frame_mv[NEARESTMV], frame_mv[NEARMV],
3902 yv12_mb, scale_factor);
3904 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3905 frame_mv[ZEROMV][ref_frame].as_int = 0;
3908 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3909 int mode_excluded = 0;
3910 int64_t this_rd = INT64_MAX;
3911 int disable_skip = 0;
3912 int compmode_cost = 0;
3913 int rate2 = 0, rate_y = 0, rate_uv = 0;
3914 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3916 int64_t tx_cache[TX_MODES];
3919 int64_t total_sse = INT_MAX;
3922 for (i = 0; i < TX_MODES; ++i)
3923 tx_cache[i] = INT64_MAX;
3926 ref_frame = vp9_ref_order[mode_index].ref_frame;
3927 second_ref_frame = vp9_ref_order[mode_index].second_ref_frame;
3929 // Look at the reference frame of the best mode so far and set the
3930 // skip mask to look at a subset of the remaining modes.
3931 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3932 if (mode_index == 3) {
3933 switch (vp9_ref_order[best_mode_index].ref_frame) {
3935 cpi->mode_skip_mask = 0;
3938 cpi->mode_skip_mask = 0x0010;
3941 cpi->mode_skip_mask = 0x0008;
3944 cpi->mode_skip_mask = 0x0000;
3947 case MAX_REF_FRAMES:
3948 assert(!"Invalid Reference frame");
3951 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3955 // Skip if the current reference frame has been masked off
3956 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3957 (cpi->ref_frame_mask & (1 << ref_frame)))
3960 // Test best rd so far against threshold for trying this mode.
3962 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3963 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3964 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3967 // Do not allow compound prediction if the segment level reference
3968 // frame feature is in use as in this case there can only be one reference.
3969 if ((second_ref_frame > INTRA_FRAME) &&
3970 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3973 mbmi->ref_frame[0] = ref_frame;
3974 mbmi->ref_frame[1] = second_ref_frame;
3976 if (!(ref_frame == INTRA_FRAME
3977 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3980 if (!(second_ref_frame == NONE
3981 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3985 comp_pred = second_ref_frame > INTRA_FRAME;
3987 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3988 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME)
3990 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3991 if (ref_frame != best_inter_ref_frame &&
3992 second_ref_frame != best_inter_ref_frame)
3996 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3998 if (ref_frame > 0 &&
3999 vp9_is_scaled(scale_factor[ref_frame].sfc))
4002 if (second_ref_frame > 0 &&
4003 vp9_is_scaled(scale_factor[second_ref_frame].sfc))
4006 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
4007 mbmi->uv_mode = DC_PRED;
4009 // Evaluate all sub-pel filters irrespective of whether we can use
4010 // them for this frame.
4011 mbmi->interp_filter = cm->mcomp_filter_type;
4012 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4015 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4017 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
4019 mode_excluded = mode_excluded
4021 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
4023 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
4026 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
4030 // Select prediction reference frames.
4031 for (i = 0; i < MAX_MB_PLANE; i++) {
4032 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4034 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4037 // If the segment reference frame feature is enabled....
4038 // then do nothing if the current ref frame is not allowed..
4039 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4040 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
4043 // If the segment skip feature is enabled....
4044 // then do nothing if the current mode is not allowed..
4045 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
4046 ref_frame != INTRA_FRAME) {
4048 // Disable this drop out case if the ref frame
4049 // segment level feature is enabled for this segment. This is to
4050 // prevent the possibility that we end up unable to pick any mode.
4051 } else if (!vp9_segfeature_active(seg, segment_id,
4052 SEG_LVL_REF_FRAME)) {
4053 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4054 // unless ARNR filtering is enabled in which case we want
4055 // an unfiltered alternative. We allow near/nearest as well
4056 // because they may result in zero-zero MVs but be cheaper.
4057 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4061 #ifdef MODE_TEST_HIT_STATS
4063 // Keep a rcord of the number of test hits at each size
4064 cpi->mode_test_hits[bsize]++;
4067 if (ref_frame == INTRA_FRAME) {
4069 mbmi->tx_size = TX_4X4;
4070 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4071 &distortion_y, best_rd) >= best_rd)
4074 rate2 += intra_cost_penalty;
4075 distortion2 += distortion_y;
4077 if (rate_uv_intra[TX_4X4] == INT_MAX) {
4078 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
4079 &rate_uv_tokenonly[TX_4X4],
4080 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4083 rate2 += rate_uv_intra[TX_4X4];
4084 rate_uv = rate_uv_tokenonly[TX_4X4];
4085 distortion2 += dist_uv[TX_4X4];
4086 distortion_uv = dist_uv[TX_4X4];
4087 mbmi->uv_mode = mode_uv[TX_4X4];
4088 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4089 for (i = 0; i < TX_MODES; ++i)
4090 tx_cache[i] = tx_cache[ONLY_4X4];
4094 int64_t this_rd_thresh;
4095 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4096 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4097 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4098 int tmp_best_skippable = 0;
4099 int switchable_filter_index;
4100 int_mv *second_ref = comp_pred ?
4101 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4102 b_mode_info tmp_best_bmodes[16];
4103 MB_MODE_INFO tmp_best_mbmode;
4104 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4105 int pred_exists = 0;
4108 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4109 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4110 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4111 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4112 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4113 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4115 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
4116 if (cm->mcomp_filter_type != BILINEAR) {
4117 tmp_best_filter = EIGHTTAP;
4118 if (x->source_variance <
4119 cpi->sf.disable_filter_search_var_thresh) {
4120 tmp_best_filter = EIGHTTAP;
4121 vp9_zero(cpi->rd_filter_cache);
4123 for (switchable_filter_index = 0;
4124 switchable_filter_index < SWITCHABLE_FILTERS;
4125 ++switchable_filter_index) {
4128 mbmi->interp_filter = switchable_filter_index;
4129 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4131 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4132 &mbmi->ref_mvs[ref_frame][0],
4135 &rate, &rate_y, &distortion,
4136 &skippable, &total_sse,
4137 (int)this_rd_thresh, seg_mvs,
4138 bsi, switchable_filter_index,
4141 if (tmp_rd == INT64_MAX)
4143 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4144 rs = get_switchable_rate(x);
4145 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4146 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4147 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4149 if (cm->mcomp_filter_type == SWITCHABLE)
4152 newbest = (tmp_rd < tmp_best_rd);
4154 tmp_best_filter = mbmi->interp_filter;
4155 tmp_best_rd = tmp_rd;
4157 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
4158 (mbmi->interp_filter == cm->mcomp_filter_type &&
4159 cm->mcomp_filter_type != SWITCHABLE)) {
4160 tmp_best_rdu = tmp_rd;
4161 tmp_best_rate = rate;
4162 tmp_best_ratey = rate_y;
4163 tmp_best_distortion = distortion;
4164 tmp_best_sse = total_sse;
4165 tmp_best_skippable = skippable;
4166 tmp_best_mbmode = *mbmi;
4167 for (i = 0; i < 4; i++) {
4168 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4169 x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];
4172 if (switchable_filter_index == 0 &&
4173 cpi->sf.use_rd_breakout &&
4174 best_rd < INT64_MAX) {
4175 if (tmp_best_rdu / 2 > best_rd) {
4176 // skip searching the other filters if the first is
4177 // already substantially larger than the best so far
4178 tmp_best_filter = mbmi->interp_filter;
4179 tmp_best_rdu = INT64_MAX;
4184 } // switchable_filter_index loop
4188 if (tmp_best_rdu == INT64_MAX)
4191 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
4192 tmp_best_filter : cm->mcomp_filter_type);
4193 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4195 // Handles the special case when a filter that is not in the
4196 // switchable list (bilinear, 6-tap) is indicated at the frame level
4197 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4198 &mbmi->ref_mvs[ref_frame][0],
4201 &rate, &rate_y, &distortion,
4202 &skippable, &total_sse,
4203 (int)this_rd_thresh, seg_mvs,
4206 if (tmp_rd == INT64_MAX)
4209 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
4210 int rs = get_switchable_rate(x);
4211 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
4213 tmp_rd = tmp_best_rdu;
4214 total_sse = tmp_best_sse;
4215 rate = tmp_best_rate;
4216 rate_y = tmp_best_ratey;
4217 distortion = tmp_best_distortion;
4218 skippable = tmp_best_skippable;
4219 *mbmi = tmp_best_mbmode;
4220 for (i = 0; i < 4; i++)
4221 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4225 distortion2 += distortion;
4227 if (cpi->common.mcomp_filter_type == SWITCHABLE)
4228 rate2 += get_switchable_rate(x);
4230 if (!mode_excluded) {
4232 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
4234 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
4236 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4238 tmp_best_rdu = best_rd -
4239 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4240 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4242 if (tmp_best_rdu > 0) {
4243 // If even the 'Y' rd value of split is higher than best so far
4244 // then dont bother looking at UV
4245 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4247 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4248 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4249 if (rate_uv == INT_MAX)
4252 distortion2 += distortion_uv;
4253 skippable = skippable && uv_skippable;
4254 total_sse += uv_sse;
4256 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4257 for (i = 0; i < TX_MODES; ++i)
4258 tx_cache[i] = tx_cache[ONLY_4X4];
4262 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
4263 rate2 += compmode_cost;
4266 // Estimate the reference frame signaling cost and add it
4267 // to the rolling cost variable.
4268 if (second_ref_frame > INTRA_FRAME) {
4269 rate2 += ref_costs_comp[ref_frame];
4271 rate2 += ref_costs_single[ref_frame];
4274 if (!disable_skip) {
4275 // Test for the condition where skip block will be activated
4276 // because there are no non zero coefficients and make any
4277 // necessary adjustment for rate. Ignore if skip is coded at
4278 // segment level as the cost wont have been added in.
4279 // Is Mb level skip allowed (i.e. not coded at segment level).
4280 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4283 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4284 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4285 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4286 // Add in the cost of the no skip flag.
4287 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4289 rate2 += prob_skip_cost;
4291 // FIXME(rbultje) make this work for splitmv also
4292 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4294 rate2 += prob_skip_cost;
4295 distortion2 = total_sse;
4296 assert(total_sse >= 0);
4297 rate2 -= (rate_y + rate_uv);
4302 } else if (mb_skip_allowed) {
4303 // Add in the cost of the no skip flag.
4304 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4306 rate2 += prob_skip_cost;
4309 // Calculate the final RD estimate for this mode.
4310 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4313 // Keep record of best inter rd with single reference
4314 if (xd->mi_8x8[0]->mbmi.ref_frame[0] > INTRA_FRAME &&
4315 xd->mi_8x8[0]->mbmi.ref_frame[1] == NONE &&
4317 this_rd < best_inter_rd) {
4318 best_inter_rd = this_rd;
4319 best_inter_ref_frame = ref_frame;
4322 if (!disable_skip && ref_frame == INTRA_FRAME) {
4323 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
4324 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4325 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4326 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4329 // Did this mode help.. i.e. is it the new best mode
4330 if (this_rd < best_rd || x->skip) {
4331 if (!mode_excluded) {
4332 // Note index of best mode so far
4333 best_mode_index = mode_index;
4335 if (ref_frame == INTRA_FRAME) {
4336 /* required for left and above block mv */
4337 mbmi->mv[0].as_int = 0;
4340 *returnrate = rate2;
4341 *returndistortion = distortion2;
4343 best_yrd = best_rd -
4344 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4345 best_mbmode = *mbmi;
4346 best_skip2 = this_skip2;
4347 if (!x->select_txfm_size)
4348 swap_block_ptr(x, ctx);
4349 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4350 sizeof(uint8_t) * ctx->num_4x4_blk);
4352 for (i = 0; i < 4; i++)
4353 best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4355 // TODO(debargha): enhance this test with a better distortion prediction
4356 // based on qp, activity mask and history
4357 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4358 (mode_index > MIN_EARLY_TERM_INDEX)) {
4359 const int qstep = xd->plane[0].dequant[1];
4360 // TODO(debargha): Enhance this by specializing for each mode_index
4362 if (x->source_variance < UINT_MAX) {
4363 const int var_adjust = (x->source_variance < 16);
4364 scale -= var_adjust;
4366 if (ref_frame > INTRA_FRAME &&
4367 distortion2 * scale < qstep * qstep) {
4374 /* keep record of best compound/single-only prediction */
4375 if (!disable_skip && ref_frame != INTRA_FRAME) {
4376 int single_rd, hybrid_rd, single_rate, hybrid_rate;
4378 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
4379 single_rate = rate2 - compmode_cost;
4380 hybrid_rate = rate2;
4382 single_rate = rate2;
4383 hybrid_rate = rate2 + compmode_cost;
4386 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4387 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4389 if (second_ref_frame <= INTRA_FRAME &&
4390 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
4391 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
4392 } else if (second_ref_frame > INTRA_FRAME &&
4393 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
4394 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
4396 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
4397 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
4400 /* keep record of best filter type */
4401 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4402 cm->mcomp_filter_type != BILINEAR) {
4403 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
4404 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
4405 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4407 // In cases of poor prediction, filter_cache[] can contain really big
4408 // values, which actually are bigger than this_rd itself. This can
4409 // cause negative best_filter_rd[] values, which is obviously silly.
4410 // Therefore, if filter_cache < ref, we do an adjusted calculation.
4411 if (cpi->rd_filter_cache[i] >= ref)
4412 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
4413 else // FIXME(rbultje) do this for comppred also
4414 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
4415 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4419 /* keep record of best txfm size */
4420 if (bsize < BLOCK_32X32) {
4421 if (bsize < BLOCK_16X16) {
4422 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4423 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4425 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4427 if (!mode_excluded && this_rd != INT64_MAX) {
4428 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4429 int64_t adj_rd = INT64_MAX;
4430 if (ref_frame > INTRA_FRAME)
4431 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4435 if (adj_rd < best_tx_rd[i])
4436 best_tx_rd[i] = adj_rd;
4443 if (x->skip && !comp_pred)
4447 if (best_rd >= best_rd_so_far)
4450 // If we used an estimate for the uv intra rd in the loop above...
4451 if (cpi->sf.use_uv_intra_rd_estimate) {
4452 // Do Intra UV best rd mode selection if best mode choice above was intra.
4453 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
4454 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
4455 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
4456 &rate_uv_tokenonly[uv_tx_size],
4457 &dist_uv[uv_tx_size],
4458 &skip_uv[uv_tx_size],
4463 // If we are using reference masking and the set mask flag is set then
4464 // create the reference frame mask.
4465 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
4466 cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame);
4468 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4469 *returnrate = INT_MAX;
4470 *returndistortion = INT_MAX;
4474 assert((cm->mcomp_filter_type == SWITCHABLE) ||
4475 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
4476 (best_mbmode.ref_frame[0] == INTRA_FRAME));
4478 // Updating rd_thresh_freq_fact[] here means that the different
4479 // partition/block sizes are handled independently based on the best
4480 // choice for the current partition. It may well be better to keep a scaled
4481 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4482 // combination that wins out.
4483 if (cpi->sf.adaptive_rd_thresh) {
4484 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4485 if (mode_index == best_mode_index) {
4486 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
4487 (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
4489 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
4490 if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
4491 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
4492 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
4493 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
4500 *mbmi = best_mbmode;
4501 x->skip |= best_skip2;
4502 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4503 for (i = 0; i < 4; i++)
4504 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4506 for (i = 0; i < 4; ++i)
4507 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4509 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4510 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4513 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
4514 if (best_pred_rd[i] == INT64_MAX)
4515 best_pred_diff[i] = INT_MIN;
4517 best_pred_diff[i] = best_rd - best_pred_rd[i];
4521 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4522 if (best_filter_rd[i] == INT64_MAX)
4523 best_filter_diff[i] = 0;
4525 best_filter_diff[i] = best_rd - best_filter_rd[i];
4527 if (cm->mcomp_filter_type == SWITCHABLE)
4528 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4530 vp9_zero(best_filter_diff);
4534 for (i = 0; i < TX_MODES; i++) {
4535 if (best_tx_rd[i] == INT64_MAX)
4536 best_tx_diff[i] = 0;
4538 best_tx_diff[i] = best_rd - best_tx_rd[i];
4541 vp9_zero(best_tx_diff);
4544 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
4546 store_coding_context(x, ctx, best_mode_index,
4547 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4548 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4549 mbmi->ref_frame[1]][0],
4550 best_pred_diff, best_tx_diff, best_filter_diff);