2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/encoder/vp9_modecosts.h"
21 #include "vp9/encoder/vp9_encodeintra.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_reconinter.h"
24 #include "vp9/common/vp9_reconintra.h"
25 #include "vp9/common/vp9_findnearmv.h"
26 #include "vp9/common/vp9_quant_common.h"
27 #include "vp9/encoder/vp9_encodemb.h"
28 #include "vp9/encoder/vp9_quantize.h"
29 #include "vp9/encoder/vp9_variance.h"
30 #include "vp9/encoder/vp9_mcomp.h"
31 #include "vp9/encoder/vp9_rdopt.h"
32 #include "vp9/encoder/vp9_ratectrl.h"
33 #include "vpx_mem/vpx_mem.h"
34 #include "vp9/common/vp9_systemdependent.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/common/vp9_seg_common.h"
37 #include "vp9/common/vp9_pred_common.h"
38 #include "vp9/common/vp9_entropy.h"
40 #include "vp9/common/vp9_mvref_common.h"
41 #include "vp9/common/vp9_common.h"
43 #define INVALID_MV 0x80008000
45 /* Factor to weigh the rate for switchable interp filters */
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 DECLARE_ALIGNED(16, extern const uint8_t,
49 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
51 #define I4X4_PRED 0x8000
52 #define SPLITMV 0x10000
54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55 {NEARESTMV, LAST_FRAME, NONE},
56 {DC_PRED, INTRA_FRAME, NONE},
58 {NEARESTMV, ALTREF_FRAME, NONE},
59 {NEARESTMV, GOLDEN_FRAME, NONE},
60 {NEWMV, LAST_FRAME, NONE},
61 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
62 {NEARMV, LAST_FRAME, NONE},
63 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
65 {NEWMV, GOLDEN_FRAME, NONE},
66 {NEWMV, ALTREF_FRAME, NONE},
67 {NEARMV, ALTREF_FRAME, NONE},
69 {TM_PRED, INTRA_FRAME, NONE},
71 {NEARMV, LAST_FRAME, ALTREF_FRAME},
72 {NEWMV, LAST_FRAME, ALTREF_FRAME},
73 {NEARMV, GOLDEN_FRAME, NONE},
74 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
75 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
77 {SPLITMV, LAST_FRAME, NONE},
78 {SPLITMV, GOLDEN_FRAME, NONE},
79 {SPLITMV, ALTREF_FRAME, NONE},
80 {SPLITMV, LAST_FRAME, ALTREF_FRAME},
81 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
83 {ZEROMV, LAST_FRAME, NONE},
84 {ZEROMV, GOLDEN_FRAME, NONE},
85 {ZEROMV, ALTREF_FRAME, NONE},
86 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
87 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
89 {I4X4_PRED, INTRA_FRAME, NONE},
90 {H_PRED, INTRA_FRAME, NONE},
91 {V_PRED, INTRA_FRAME, NONE},
92 {D135_PRED, INTRA_FRAME, NONE},
93 {D207_PRED, INTRA_FRAME, NONE},
94 {D153_PRED, INTRA_FRAME, NONE},
95 {D63_PRED, INTRA_FRAME, NONE},
96 {D117_PRED, INTRA_FRAME, NONE},
97 {D45_PRED, INTRA_FRAME, NONE},
100 // The baseline rd thresholds for breaking out of the rd loop for
101 // certain modes are assumed to be based on 8x8 blocks.
102 // This table is used to correct for blocks size.
103 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
104 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
105 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
107 #define MAX_RD_THRESH_FACT 64
108 #define RD_THRESH_INC 1
110 static void fill_token_costs(vp9_coeff_cost *c,
111 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114 for (t = TX_4X4; t <= TX_32X32; t++)
115 for (i = 0; i < BLOCK_TYPES; i++)
116 for (j = 0; j < REF_TYPES; j++)
117 for (k = 0; k < COEF_BANDS; k++)
118 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
119 vp9_prob probs[ENTROPY_NODES];
120 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
121 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
123 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
125 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
126 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
130 static const int rd_iifactor[32] = {
131 4, 4, 3, 2, 1, 0, 0, 0,
132 0, 0, 0, 0, 0, 0, 0, 0,
133 0, 0, 0, 0, 0, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0,
137 // 3* dc_qlookup[Q]*dc_qlookup[Q];
139 /* values are now correlated to quantizer */
140 static int sad_per_bit16lut[QINDEX_RANGE];
141 static int sad_per_bit4lut[QINDEX_RANGE];
143 void vp9_init_me_luts() {
146 // Initialize the sad lut tables using a formulaic calculation for now
147 // This is to make it easier to resolve the impact of experimental changes
148 // to the quantizer tables.
149 for (i = 0; i < QINDEX_RANGE; i++) {
150 sad_per_bit16lut[i] =
151 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
152 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
156 static int compute_rd_mult(int qindex) {
157 const int q = vp9_dc_quant(qindex, 0);
158 return (11 * q * q) >> 2;
161 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
162 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
163 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
167 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
170 vp9_clear_system_state(); // __asm emms;
172 // Further tests required to see if optimum is different
173 // for key frames, golden frames and arf frames.
174 // if (cpi->common.refresh_golden_frame ||
175 // cpi->common.refresh_alt_ref_frame)
176 qindex = clamp(qindex, 0, MAXQ);
178 cpi->RDMULT = compute_rd_mult(qindex);
179 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
180 if (cpi->twopass.next_iiratio > 31)
181 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
184 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
186 cpi->mb.errorperbit = cpi->RDMULT >> 6;
187 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
189 vp9_set_speed_features(cpi);
191 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
196 if (cpi->RDMULT > 1000) {
200 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
201 for (i = 0; i < MAX_MODES; ++i) {
202 // Threshold here seem unecessarily harsh but fine given actual
203 // range of values used for cpi->sf.thresh_mult[]
204 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
206 // *4 relates to the scaling of rd_thresh_block_size_factor[]
207 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
208 cpi->rd_threshes[bsize][i] =
209 cpi->sf.thresh_mult[i] * q *
210 rd_thresh_block_size_factor[bsize] / (4 * 100);
212 cpi->rd_threshes[bsize][i] = INT_MAX;
219 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
220 for (i = 0; i < MAX_MODES; i++) {
221 // Threshold here seem unecessarily harsh but fine given actual
222 // range of values used for cpi->sf.thresh_mult[]
223 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
225 if (cpi->sf.thresh_mult[i] < thresh_max) {
226 cpi->rd_threshes[bsize][i] =
227 cpi->sf.thresh_mult[i] * q *
228 rd_thresh_block_size_factor[bsize] / 4;
230 cpi->rd_threshes[bsize][i] = INT_MAX;
236 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
238 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
239 vp9_cost_tokens(cpi->mb.partition_cost[i],
240 cpi->common.fc.partition_prob[cpi->common.frame_type][i],
243 /*rough estimate for costing*/
244 vp9_init_mode_costs(cpi);
246 if (cpi->common.frame_type != KEY_FRAME) {
247 vp9_build_nmv_cost_table(
248 cpi->mb.nmvjointcost,
249 cpi->mb.e_mbd.allow_high_precision_mv ?
250 cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
251 &cpi->common.fc.nmvc,
252 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
254 for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
255 MB_PREDICTION_MODE m;
257 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
258 cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
259 cost_token(vp9_inter_mode_tree,
260 cpi->common.fc.inter_mode_probs[i],
261 vp9_inter_mode_encodings - NEARESTMV + m);
266 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
267 const double *tab1, const double *tab2,
268 double *v1, double *v2) {
269 double y = x * inv_step;
272 *v1 = tab1[ntab - 1];
273 *v2 = tab2[ntab - 1];
276 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
277 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
281 static void model_rd_norm(double x, double *R, double *D) {
282 static const int inv_tab_step = 8;
283 static const int tab_size = 120;
284 // NOTE: The tables below must be of the same size
287 // This table models the rate for a Laplacian source
288 // source with given variance when quantized with a uniform quantizer
289 // with given stepsize. The closed form expression is:
290 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
291 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
292 // and H(x) is the binary entropy function.
293 static const double rate_tab[] = {
294 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
295 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
296 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
297 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
298 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
299 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
300 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
301 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
302 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
303 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
304 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
305 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
306 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
307 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
308 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
310 // Normalized distortion
311 // This table models the normalized distortion for a Laplacian source
312 // source with given variance when quantized with a uniform quantizer
313 // with given stepsize. The closed form expression is:
314 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
315 // where x = qpstep / sqrt(variance)
316 // Note the actual distortion is Dn * variance.
317 static const double dist_tab[] = {
318 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
319 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
320 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
321 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
322 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
323 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
324 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
325 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
326 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
327 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
328 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
329 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
330 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
331 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
332 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
335 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
336 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
337 assert(sizeof(rate_tab) == sizeof(dist_tab));
340 linear_interpolate2(x, tab_size, inv_tab_step,
341 rate_tab, dist_tab, R, D);
344 static void model_rd_from_var_lapndz(int var, int n, int qstep,
345 int *rate, int64_t *dist) {
346 // This function models the rate and distortion for a Laplacian
347 // source with given variance when quantized with a uniform quantizer
348 // with given stepsize. The closed form expressions are in:
349 // Hang and Chen, "Source Model for transform video coder and its
350 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
351 // Sys. for Video Tech., April 1997.
352 vp9_clear_system_state();
353 if (var == 0 || n == 0) {
358 double s2 = (double) var / n;
359 double x = qstep / sqrt(s2);
360 model_rd_norm(x, &R, &D);
361 *rate = ((n << 8) * R + 0.5);
362 *dist = (var * D + 0.5);
364 vp9_clear_system_state();
367 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
368 MACROBLOCK *x, MACROBLOCKD *xd,
369 int *out_rate_sum, int64_t *out_dist_sum) {
370 // Note our transform coeffs are 8 times an orthogonal transform.
371 // Hence quantizer step is also 8 times. To get effective quantizer
372 // we need to divide by 8 before sending to modeling function.
373 int i, rate_sum = 0, dist_sum = 0;
375 for (i = 0; i < MAX_MB_PLANE; ++i) {
376 struct macroblock_plane *const p = &x->plane[i];
377 struct macroblockd_plane *const pd = &xd->plane[i];
378 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
382 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
383 pd->dst.buf, pd->dst.stride, &sse);
384 // sse works better than var, since there is no dc prediction used
385 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
386 pd->dequant[1] >> 3, &rate, &dist);
392 *out_rate_sum = rate_sum;
393 *out_dist_sum = dist_sum << 4;
396 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
398 MACROBLOCK *x, MACROBLOCKD *xd,
399 int *out_rate_sum, int64_t *out_dist_sum,
403 struct macroblock_plane *const p = &x->plane[0];
404 struct macroblockd_plane *const pd = &xd->plane[0];
405 const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
406 const int height = 4 << num_4x4_blocks_high_lookup[bsize];
408 int64_t dist_sum = 0;
409 const int t = 4 << tx_size;
411 if (tx_size == TX_4X4) {
413 } else if (tx_size == TX_8X8) {
415 } else if (tx_size == TX_16X16) {
417 } else if (tx_size == TX_32X32) {
424 for (j = 0; j < height; j += t) {
425 for (k = 0; k < width; k += t) {
429 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
430 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
432 // sse works better than var, since there is no dc prediction used
433 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
436 *out_skip &= (rate < 1024);
440 *out_rate_sum = rate_sum;
441 *out_dist_sum = dist_sum << 4;
444 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
445 intptr_t block_size, int64_t *ssz) {
447 int64_t error = 0, sqcoeff = 0;
449 for (i = 0; i < block_size; i++) {
450 int this_diff = coeff[i] - dqcoeff[i];
451 error += (unsigned)this_diff * this_diff;
452 sqcoeff += (unsigned) coeff[i] * coeff[i];
459 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
460 * decide whether to include cost of a trailing EOB node or not (i.e. we
461 * can skip this if the last coefficient in this transform block, e.g. the
462 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
464 static const int16_t band_counts[TX_SIZES][8] = {
465 { 1, 2, 3, 4, 3, 16 - 13, 0 },
466 { 1, 2, 3, 4, 11, 64 - 21, 0 },
467 { 1, 2, 3, 4, 11, 256 - 21, 0 },
468 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
471 static INLINE int cost_coeffs(MACROBLOCK *mb,
472 int plane, int block,
473 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
475 const int16_t *scan, const int16_t *nb) {
476 MACROBLOCKD *const xd = &mb->e_mbd;
477 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
478 struct macroblockd_plane *pd = &xd->plane[plane];
479 const PLANE_TYPE type = pd->plane_type;
480 const int16_t *band_count = &band_counts[tx_size][1];
481 const int eob = pd->eobs[block];
482 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
483 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
484 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
485 mb->token_costs[tx_size][type][ref];
486 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
487 uint8_t token_cache[1024];
488 int pt = combine_entropy_contexts(above_ec, left_ec);
491 // Check for consistency of tx_size with mode info
492 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
493 : get_uv_tx_size(mbmi) == tx_size);
497 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
500 int band_left = *band_count++;
503 int v = qcoeff_ptr[0];
504 int prev_t = vp9_dct_value_tokens_ptr[v].token;
505 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
506 token_cache[0] = vp9_pt_energy_class[prev_t];
510 for (c = 1; c < eob; c++) {
511 const int rc = scan[c];
515 t = vp9_dct_value_tokens_ptr[v].token;
516 pt = get_coef_context(nb, token_cache, c);
517 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
518 token_cache[rc] = vp9_pt_energy_class[t];
521 band_left = *band_count++;
528 pt = get_coef_context(nb, token_cache, c);
529 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
533 // is eob first coefficient;
539 struct rdcost_block_args {
541 ENTROPY_CONTEXT t_above[16];
542 ENTROPY_CONTEXT t_left[16];
551 const int16_t *scan, *nb;
554 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
555 const int ss_txfrm_size = tx_size << 1;
556 struct rdcost_block_args* args = arg;
557 MACROBLOCK* const x = args->x;
558 MACROBLOCKD* const xd = &x->e_mbd;
559 struct macroblock_plane *const p = &x->plane[plane];
560 struct macroblockd_plane *const pd = &xd->plane[plane];
562 int shift = args->tx_size == TX_32X32 ? 0 : 2;
563 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
564 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
565 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
567 args->sse += this_sse >> shift;
569 if (x->skip_encode &&
570 xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
571 // TODO(jingning): tune the model to better capture the distortion.
572 int64_t p = (pd->dequant[1] * pd->dequant[1] *
573 (1 << ss_txfrm_size)) >> shift;
579 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
580 TX_SIZE tx_size, void *arg) {
581 struct rdcost_block_args* args = arg;
584 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
586 args->rate += cost_coeffs(args->x, plane, block,
587 args->t_above + x_idx,
588 args->t_left + y_idx, args->tx_size,
589 args->scan, args->nb);
592 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
593 TX_SIZE tx_size, void *arg) {
594 struct rdcost_block_args *args = arg;
595 MACROBLOCK *const x = args->x;
596 MACROBLOCKD *const xd = &x->e_mbd;
597 struct encode_b_args encode_args = {x, NULL};
598 int64_t rd1, rd2, rd;
602 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
603 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
605 if (rd > args->best_rd) {
607 args->rate = INT_MAX;
608 args->dist = INT64_MAX;
609 args->sse = INT64_MAX;
613 if (!is_inter_block(&xd->mode_info_context->mbmi))
614 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
616 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
618 dist_block(plane, block, tx_size, args);
619 rate_block(plane, block, plane_bsize, tx_size, args);
622 static void txfm_rd_in_plane(MACROBLOCK *x,
623 int *rate, int64_t *distortion,
624 int *skippable, int64_t *sse,
625 int64_t ref_best_rd, int plane,
626 BLOCK_SIZE bsize, TX_SIZE tx_size) {
627 MACROBLOCKD *const xd = &x->e_mbd;
628 struct macroblockd_plane *const pd = &xd->plane[plane];
629 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
630 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
631 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
633 struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size,
634 num_4x4_blocks_wide, num_4x4_blocks_high,
635 0, 0, 0, ref_best_rd, 0 };
637 xd->mode_info_context->mbmi.tx_size = tx_size;
641 vpx_memcpy(&args.t_above, pd->above_context,
642 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
643 vpx_memcpy(&args.t_left, pd->left_context,
644 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
645 get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
646 &args.scan, &args.nb);
649 for (i = 0; i < num_4x4_blocks_wide; i += 2)
650 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
651 for (i = 0; i < num_4x4_blocks_high; i += 2)
652 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
653 get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
654 &args.scan, &args.nb);
657 for (i = 0; i < num_4x4_blocks_wide; i += 4)
658 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
659 for (i = 0; i < num_4x4_blocks_high; i += 4)
660 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
661 get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
662 &args.scan, &args.nb);
665 for (i = 0; i < num_4x4_blocks_wide; i += 8)
666 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
667 for (i = 0; i < num_4x4_blocks_high; i += 8)
668 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
669 args.scan = vp9_default_scan_32x32;
670 args.nb = vp9_default_scan_32x32_neighbors;
676 foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
677 *distortion = args.dist;
680 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip);
683 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
684 int *rate, int64_t *distortion,
685 int *skip, int64_t *sse,
688 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
689 VP9_COMMON *const cm = &cpi->common;
690 MACROBLOCKD *const xd = &x->e_mbd;
691 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
692 if (max_txfm_size == TX_32X32 &&
693 (cm->tx_mode == ALLOW_32X32 ||
694 cm->tx_mode == TX_MODE_SELECT)) {
695 mbmi->tx_size = TX_32X32;
696 } else if (max_txfm_size >= TX_16X16 &&
697 (cm->tx_mode == ALLOW_16X16 ||
698 cm->tx_mode == ALLOW_32X32 ||
699 cm->tx_mode == TX_MODE_SELECT)) {
700 mbmi->tx_size = TX_16X16;
701 } else if (cm->tx_mode != ONLY_4X4) {
702 mbmi->tx_size = TX_8X8;
704 mbmi->tx_size = TX_4X4;
706 txfm_rd_in_plane(x, rate, distortion, skip,
707 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
709 cpi->txfm_stepdown_count[0]++;
712 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
713 int (*r)[2], int *rate,
714 int64_t *d, int64_t *distortion,
716 int64_t tx_cache[TX_MODES],
718 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
719 VP9_COMMON *const cm = &cpi->common;
720 MACROBLOCKD *const xd = &x->e_mbd;
721 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
722 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
723 int64_t rd[TX_SIZES][2];
727 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
729 for (n = TX_4X4; n <= max_tx_size; n++) {
731 if (r[n][0] == INT_MAX)
733 for (m = 0; m <= n - (n == max_tx_size); m++) {
735 r[n][1] += vp9_cost_zero(tx_probs[m]);
737 r[n][1] += vp9_cost_one(tx_probs[m]);
741 assert(skip_prob > 0);
742 s0 = vp9_cost_bit(skip_prob, 0);
743 s1 = vp9_cost_bit(skip_prob, 1);
745 for (n = TX_4X4; n <= max_tx_size; n++) {
746 if (d[n] == INT64_MAX) {
747 rd[n][0] = rd[n][1] = INT64_MAX;
751 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
753 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
754 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
758 if (max_tx_size == TX_32X32 &&
759 (cm->tx_mode == ALLOW_32X32 ||
760 (cm->tx_mode == TX_MODE_SELECT &&
761 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
762 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
763 mbmi->tx_size = TX_32X32;
764 } else if (max_tx_size >= TX_16X16 &&
765 (cm->tx_mode == ALLOW_16X16 ||
766 cm->tx_mode == ALLOW_32X32 ||
767 (cm->tx_mode == TX_MODE_SELECT &&
768 rd[TX_16X16][1] < rd[TX_8X8][1] &&
769 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
770 mbmi->tx_size = TX_16X16;
771 } else if (cm->tx_mode == ALLOW_8X8 ||
772 cm->tx_mode == ALLOW_16X16 ||
773 cm->tx_mode == ALLOW_32X32 ||
774 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
775 mbmi->tx_size = TX_8X8;
777 mbmi->tx_size = TX_4X4;
780 *distortion = d[mbmi->tx_size];
781 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
782 *skip = s[mbmi->tx_size];
784 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
785 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
786 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
787 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
788 if (max_tx_size == TX_32X32 &&
789 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
790 rd[TX_32X32][1] < rd[TX_4X4][1])
791 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
792 else if (max_tx_size >= TX_16X16 &&
793 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
794 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
796 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
797 rd[TX_4X4][1] : rd[TX_8X8][1];
799 if (max_tx_size == TX_32X32 &&
800 rd[TX_32X32][1] < rd[TX_16X16][1] &&
801 rd[TX_32X32][1] < rd[TX_8X8][1] &&
802 rd[TX_32X32][1] < rd[TX_4X4][1]) {
803 cpi->txfm_stepdown_count[0]++;
804 } else if (max_tx_size >= TX_16X16 &&
805 rd[TX_16X16][1] < rd[TX_8X8][1] &&
806 rd[TX_16X16][1] < rd[TX_4X4][1]) {
807 cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
808 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
809 cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
811 cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
815 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
816 int (*r)[2], int *rate,
817 int64_t *d, int64_t *distortion,
818 int *s, int *skip, int64_t *sse,
821 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
822 VP9_COMMON *const cm = &cpi->common;
823 MACROBLOCKD *const xd = &x->e_mbd;
824 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
825 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
826 int64_t rd[TX_SIZES][2];
829 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
830 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
832 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
834 // for (n = TX_4X4; n <= max_txfm_size; n++)
835 // r[n][0] = (r[n][0] * scale_r[n]);
837 for (n = TX_4X4; n <= max_txfm_size; n++) {
839 for (m = 0; m <= n - (n == max_txfm_size); m++) {
841 r[n][1] += vp9_cost_zero(tx_probs[m]);
843 r[n][1] += vp9_cost_one(tx_probs[m]);
847 assert(skip_prob > 0);
848 s0 = vp9_cost_bit(skip_prob, 0);
849 s1 = vp9_cost_bit(skip_prob, 1);
851 for (n = TX_4X4; n <= max_txfm_size; n++) {
853 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
855 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
856 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
859 for (n = TX_4X4; n <= max_txfm_size; n++) {
860 rd[n][0] = (scale_rd[n] * rd[n][0]);
861 rd[n][1] = (scale_rd[n] * rd[n][1]);
864 if (max_txfm_size == TX_32X32 &&
865 (cm->tx_mode == ALLOW_32X32 ||
866 (cm->tx_mode == TX_MODE_SELECT &&
867 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
868 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
869 rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
870 mbmi->tx_size = TX_32X32;
871 } else if (max_txfm_size >= TX_16X16 &&
872 (cm->tx_mode == ALLOW_16X16 ||
873 cm->tx_mode == ALLOW_32X32 ||
874 (cm->tx_mode == TX_MODE_SELECT &&
875 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
876 rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
877 mbmi->tx_size = TX_16X16;
878 } else if (cm->tx_mode == ALLOW_8X8 ||
879 cm->tx_mode == ALLOW_16X16 ||
880 cm->tx_mode == ALLOW_32X32 ||
881 (cm->tx_mode == TX_MODE_SELECT &&
882 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
883 mbmi->tx_size = TX_8X8;
885 mbmi->tx_size = TX_4X4;
888 // Actually encode using the chosen mode if a model was used, but do not
889 // update the r, d costs
890 txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size],
891 ref_best_rd, 0, bs, mbmi->tx_size);
893 if (max_txfm_size == TX_32X32 &&
894 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
895 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
896 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
897 cpi->txfm_stepdown_count[0]++;
898 } else if (max_txfm_size >= TX_16X16 &&
899 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
900 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
901 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
902 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
903 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
905 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
909 static void super_block_yrd(VP9_COMP *cpi,
910 MACROBLOCK *x, int *rate, int64_t *distortion,
911 int *skip, int64_t *psse, BLOCK_SIZE bs,
912 int64_t txfm_cache[TX_MODES],
913 int64_t ref_best_rd) {
914 int r[TX_SIZES][2], s[TX_SIZES];
915 int64_t d[TX_SIZES], sse[TX_SIZES];
916 MACROBLOCKD *xd = &x->e_mbd;
917 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
919 assert(bs == mbmi->sb_type);
920 if (mbmi->ref_frame[0] > INTRA_FRAME)
921 vp9_subtract_sby(x, bs);
923 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
924 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
925 mbmi->ref_frame[0] == INTRA_FRAME)) {
926 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
927 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
930 *psse = sse[mbmi->tx_size];
934 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
935 mbmi->ref_frame[0] > INTRA_FRAME) {
936 if (bs >= BLOCK_32X32)
937 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
938 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
939 if (bs >= BLOCK_16X16)
940 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
941 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
943 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
944 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
946 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
947 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
949 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
950 skip, sse, ref_best_rd, bs);
952 if (bs >= BLOCK_32X32)
953 txfm_rd_in_plane(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
954 &sse[TX_32X32], ref_best_rd, 0, bs, TX_32X32);
955 if (bs >= BLOCK_16X16)
956 txfm_rd_in_plane(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
957 &sse[TX_16X16], ref_best_rd, 0, bs, TX_16X16);
958 txfm_rd_in_plane(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
959 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
960 txfm_rd_in_plane(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
961 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
962 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
963 skip, txfm_cache, bs);
966 *psse = sse[mbmi->tx_size];
969 static int conditional_skipintra(MB_PREDICTION_MODE mode,
970 MB_PREDICTION_MODE best_intra_mode) {
971 if (mode == D117_PRED &&
972 best_intra_mode != V_PRED &&
973 best_intra_mode != D135_PRED)
975 if (mode == D63_PRED &&
976 best_intra_mode != V_PRED &&
977 best_intra_mode != D45_PRED)
979 if (mode == D207_PRED &&
980 best_intra_mode != H_PRED &&
981 best_intra_mode != D45_PRED)
983 if (mode == D153_PRED &&
984 best_intra_mode != H_PRED &&
985 best_intra_mode != D135_PRED)
990 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
991 MB_PREDICTION_MODE *best_mode,
993 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
994 int *bestrate, int *bestratey,
995 int64_t *bestdistortion,
996 BLOCK_SIZE bsize, int64_t rd_thresh) {
997 MB_PREDICTION_MODE mode;
998 MACROBLOCKD *xd = &x->e_mbd;
999 int64_t best_rd = rd_thresh;
1002 struct macroblock_plane *p = &x->plane[0];
1003 struct macroblockd_plane *pd = &xd->plane[0];
1004 const int src_stride = p->src.stride;
1005 const int dst_stride = pd->dst.stride;
1006 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1007 p->src.buf, src_stride);
1008 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1009 pd->dst.buf, dst_stride);
1010 int16_t *src_diff, *coeff;
1012 ENTROPY_CONTEXT ta[2], tempa[2];
1013 ENTROPY_CONTEXT tl[2], templ[2];
1014 TX_TYPE tx_type = DCT_DCT;
1015 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1016 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1017 int idx, idy, block;
1018 uint8_t best_dst[8 * 8];
1022 vpx_memcpy(ta, a, sizeof(ta));
1023 vpx_memcpy(tl, l, sizeof(tl));
1024 xd->mode_info_context->mbmi.tx_size = TX_4X4;
1026 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1030 if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
1033 // Only do the oblique modes if the best so far is
1034 // one of the neighboring directional modes
1035 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1036 if (conditional_skipintra(mode, *best_mode))
1040 rate = bmode_costs[mode];
1043 vpx_memcpy(tempa, ta, sizeof(ta));
1044 vpx_memcpy(templ, tl, sizeof(tl));
1046 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1047 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1049 const int16_t *scan;
1050 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1051 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1053 block = ib + idy * 2 + idx;
1054 xd->mode_info_context->bmi[block].as_mode = mode;
1055 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1056 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1057 vp9_predict_intra_block(xd, block, 1,
1059 x->skip_encode ? src : dst,
1060 x->skip_encode ? src_stride : dst_stride,
1062 vp9_subtract_block(4, 4, src_diff, 8,
1066 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1067 if (tx_type != DCT_DCT) {
1068 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1069 x->quantize_b_4x4(x, block, tx_type, 16);
1071 x->fwd_txm4x4(src_diff, coeff, 16);
1072 x->quantize_b_4x4(x, block, tx_type, 16);
1075 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
1076 ratey += cost_coeffs(x, 0, block,
1077 tempa + idx, templ + idy, TX_4X4, scan,
1078 vp9_get_coef_neighbors_handle(scan));
1079 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1081 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1084 if (tx_type != DCT_DCT)
1085 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1086 dst, pd->dst.stride, tx_type);
1088 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1089 dst, pd->dst.stride);
1094 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1096 if (this_rd < best_rd) {
1099 *bestdistortion = distortion;
1102 vpx_memcpy(a, tempa, sizeof(tempa));
1103 vpx_memcpy(l, templ, sizeof(templ));
1104 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1105 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1106 num_4x4_blocks_wide * 4);
1112 if (best_rd >= rd_thresh || x->skip_encode)
1115 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1116 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1117 num_4x4_blocks_wide * 4);
1122 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1123 MACROBLOCK * const mb,
1126 int64_t * const distortion,
1129 MACROBLOCKD *const xd = &mb->e_mbd;
1130 const BLOCK_SIZE bsize = xd->mode_info_context->mbmi.sb_type;
1131 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1132 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1135 int64_t total_distortion = 0;
1137 int64_t total_rd = 0;
1138 ENTROPY_CONTEXT t_above[4], t_left[4];
1140 MODE_INFO *const mic = xd->mode_info_context;
1142 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1143 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1145 bmode_costs = mb->mbmode_cost;
1147 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1148 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1149 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1150 const int mis = xd->mode_info_stride;
1151 MB_PREDICTION_MODE best_mode = DC_PRED;
1152 int r = INT_MAX, ry = INT_MAX;
1153 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1155 if (cpi->common.frame_type == KEY_FRAME) {
1156 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
1157 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
1158 left_block_mode(mic, i) : DC_PRED;
1160 bmode_costs = mb->y_mode_costs[A][L];
1163 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1164 t_above + idx, t_left + idy, &r, &ry, &d,
1165 bsize, best_rd - total_rd);
1166 if (this_rd >= best_rd - total_rd)
1169 total_rd += this_rd;
1171 total_distortion += d;
1174 mic->bmi[i].as_mode = best_mode;
1175 for (j = 1; j < num_4x4_blocks_high; ++j)
1176 mic->bmi[i + j * 2].as_mode = best_mode;
1177 for (j = 1; j < num_4x4_blocks_wide; ++j)
1178 mic->bmi[i + j].as_mode = best_mode;
1180 if (total_rd >= best_rd)
1186 *rate_y = tot_rate_y;
1187 *distortion = total_distortion;
1188 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode;
1190 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1193 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1194 int *rate, int *rate_tokenonly,
1195 int64_t *distortion, int *skippable,
1197 int64_t tx_cache[TX_MODES],
1199 MB_PREDICTION_MODE mode;
1200 MB_PREDICTION_MODE mode_selected = DC_PRED;
1201 MACROBLOCKD *const xd = &x->e_mbd;
1202 MODE_INFO *const mic = xd->mode_info_context;
1203 int this_rate, this_rate_tokenonly, s;
1204 int64_t this_distortion, this_rd;
1205 TX_SIZE best_tx = TX_4X4;
1207 int *bmode_costs = x->mbmode_cost;
1209 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1210 for (i = 0; i < TX_MODES; i++)
1211 tx_cache[i] = INT64_MAX;
1213 /* Y Search for intra prediction mode */
1214 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1215 int64_t local_tx_cache[TX_MODES];
1216 const int mis = xd->mode_info_stride;
1218 if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
1221 if (cpi->common.frame_type == KEY_FRAME) {
1222 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
1223 const MB_PREDICTION_MODE L = xd->left_available ?
1224 left_block_mode(mic, 0) : DC_PRED;
1226 bmode_costs = x->y_mode_costs[A][L];
1228 mic->mbmi.mode = mode;
1230 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1231 bsize, local_tx_cache, best_rd);
1233 if (this_rate_tokenonly == INT_MAX)
1236 this_rate = this_rate_tokenonly + bmode_costs[mode];
1237 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1239 if (this_rd < best_rd) {
1240 mode_selected = mode;
1242 best_tx = mic->mbmi.tx_size;
1244 *rate_tokenonly = this_rate_tokenonly;
1245 *distortion = this_distortion;
1249 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1250 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1251 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1252 local_tx_cache[cpi->common.tx_mode];
1253 if (adj_rd < tx_cache[i]) {
1254 tx_cache[i] = adj_rd;
1260 mic->mbmi.mode = mode_selected;
1261 mic->mbmi.tx_size = best_tx;
1266 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
1267 int *rate, int64_t *distortion, int *skippable,
1268 int64_t *sse, BLOCK_SIZE bsize,
1269 int64_t ref_best_rd) {
1270 MACROBLOCKD *const xd = &x->e_mbd;
1271 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1272 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1274 int pnrate = 0, pnskip = 1;
1275 int64_t pndist = 0, pnsse = 0;
1277 if (ref_best_rd < 0)
1280 if (is_inter_block(mbmi))
1281 vp9_subtract_sbuv(x, bsize);
1288 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1289 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1290 ref_best_rd, plane, bsize, uv_txfm_size);
1291 if (pnrate == INT_MAX)
1294 *distortion += pndist;
1296 *skippable &= pnskip;
1302 *distortion = INT64_MAX;
1308 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1309 int *rate, int *rate_tokenonly,
1310 int64_t *distortion, int *skippable,
1312 MB_PREDICTION_MODE mode;
1313 MB_PREDICTION_MODE mode_selected = DC_PRED;
1314 int64_t best_rd = INT64_MAX, this_rd;
1315 int this_rate_tokenonly, this_rate, s;
1316 int64_t this_distortion, this_sse;
1318 // int mode_mask = (bsize <= BLOCK_8X8)
1319 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
1321 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1322 // if (!(mode_mask & (1 << mode)))
1323 if (!(cpi->sf.intra_uv_mode_mask & (1 << mode)))
1326 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
1327 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
1328 &this_distortion, &s, &this_sse, bsize, best_rd);
1329 if (this_rate_tokenonly == INT_MAX)
1331 this_rate = this_rate_tokenonly +
1332 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1333 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1335 if (this_rd < best_rd) {
1336 mode_selected = mode;
1339 *rate_tokenonly = this_rate_tokenonly;
1340 *distortion = this_distortion;
1345 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
1350 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1351 int *rate, int *rate_tokenonly,
1352 int64_t *distortion, int *skippable,
1357 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1358 super_block_uvrd(&cpi->common, x, rate_tokenonly,
1359 distortion, skippable, &this_sse, bsize, INT64_MAX);
1360 *rate = *rate_tokenonly +
1361 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1362 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1367 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
1368 int *rate_uv, int *rate_uv_tokenonly,
1369 int64_t *dist_uv, int *skip_uv,
1370 MB_PREDICTION_MODE *mode_uv) {
1371 MACROBLOCK *const x = &cpi->mb;
1373 // Use an estimated rd for uv_intra based on DC_PRED if the
1374 // appropriate speed flag is set.
1375 if (cpi->sf.use_uv_intra_rd_estimate) {
1376 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1377 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1378 // Else do a proper rd search for each possible transform size that may
1379 // be considered in the main rd loop.
1381 rd_pick_intra_sbuv_mode(cpi, x,
1382 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1383 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1385 *mode_uv = x->e_mbd.mode_info_context->mbmi.uv_mode;
1388 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1390 MACROBLOCK *const x = &cpi->mb;
1391 MACROBLOCKD *const xd = &x->e_mbd;
1392 const int segment_id = xd->mode_info_context->mbmi.segment_id;
1394 // Don't account for mode here if segment skip is enabled.
1395 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1396 assert(is_inter_mode(mode));
1397 return x->inter_mode_cost[mode_context][mode - NEARESTMV];
1403 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1404 x->e_mbd.mode_info_context->mbmi.mode = mb;
1405 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
1408 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1411 int mi_row, int mi_col,
1412 int_mv single_newmv[MAX_REF_FRAMES],
1414 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1416 int mi_row, int mi_col,
1417 int_mv *tmp_mv, int *rate_mv);
1419 static int labels2mode(MACROBLOCK *x, int i,
1420 MB_PREDICTION_MODE this_mode,
1421 int_mv *this_mv, int_mv *this_second_mv,
1422 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1423 int_mv seg_mvs[MAX_REF_FRAMES],
1424 int_mv *best_ref_mv,
1425 int_mv *second_best_ref_mv,
1426 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1427 MACROBLOCKD *const xd = &x->e_mbd;
1428 MODE_INFO *const mic = xd->mode_info_context;
1429 MB_MODE_INFO *mbmi = &mic->mbmi;
1430 int cost = 0, thismvcost = 0;
1432 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1433 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1435 /* We have to be careful retrieving previously-encoded motion vectors.
1436 Ones from this macroblock have to be pulled from the BLOCKD array
1437 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1438 MB_PREDICTION_MODE m;
1440 // the only time we should do costing for new motion vector or mode
1441 // is when we are on a new label (jbb May 08, 2007)
1442 switch (m = this_mode) {
1444 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1445 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1447 if (mbmi->ref_frame[1] > 0) {
1448 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1449 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1450 mvjcost, mvcost, 102);
1454 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1455 if (mbmi->ref_frame[1] > 0)
1456 this_second_mv->as_int =
1457 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1460 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1461 if (mbmi->ref_frame[1] > 0)
1462 this_second_mv->as_int =
1463 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1466 this_mv->as_int = 0;
1467 if (mbmi->ref_frame[1] > 0)
1468 this_second_mv->as_int = 0;
1474 cost = cost_mv_ref(cpi, this_mode,
1475 mbmi->mode_context[mbmi->ref_frame[0]]);
1477 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1478 if (mbmi->ref_frame[1] > 0)
1479 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1481 x->partition_info->bmi[i].mode = m;
1482 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1483 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1484 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1485 &mic->bmi[i], sizeof(mic->bmi[i]));
1491 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1496 int64_t *distortion, int64_t *sse,
1497 ENTROPY_CONTEXT *ta,
1498 ENTROPY_CONTEXT *tl) {
1500 MACROBLOCKD *xd = &x->e_mbd;
1501 struct macroblockd_plane *const pd = &xd->plane[0];
1502 MODE_INFO *const mi = xd->mode_info_context;
1503 const BLOCK_SIZE bsize = mi->mbmi.sb_type;
1504 const int width = plane_block_width(bsize, pd);
1505 const int height = plane_block_height(bsize, pd);
1507 const int src_stride = x->plane[0].src.stride;
1508 uint8_t* const src = raster_block_offset_uint8(BLOCK_8X8, i,
1509 x->plane[0].src.buf,
1511 int16_t* src_diff = raster_block_offset_int16(BLOCK_8X8, i,
1512 x->plane[0].src_diff);
1513 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, i);
1514 uint8_t* const dst = raster_block_offset_uint8(BLOCK_8X8, i,
1515 pd->dst.buf, pd->dst.stride);
1516 int64_t thisdistortion = 0, thissse = 0;
1518 int ref, second_ref = has_second_ref(&mi->mbmi);
1520 for (ref = 0; ref < 1 + second_ref; ++ref) {
1521 const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
1522 pd->pre[ref].buf, pd->pre[ref].stride);
1523 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1524 dst, pd->dst.stride,
1525 &mi->bmi[i].as_mv[ref].as_mv,
1526 &xd->scale_factor[ref],
1527 width, height, ref, &xd->subpix, MV_PRECISION_Q3);
1530 vp9_subtract_block(height, width, src_diff, 8, src, src_stride,
1531 dst, pd->dst.stride);
1534 for (idy = 0; idy < height / 4; ++idy) {
1535 for (idx = 0; idx < width / 4; ++idx) {
1536 int64_t ssz, rd, rd1, rd2;
1538 k += (idy * 2 + idx);
1539 src_diff = raster_block_offset_int16(BLOCK_8X8, k,
1540 x->plane[0].src_diff);
1541 coeff = BLOCK_OFFSET(x->plane[0].coeff, k);
1542 x->fwd_txm4x4(src_diff, coeff, 16);
1543 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1544 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1547 thisrate += cost_coeffs(x, 0, k,
1549 tl + (k >> 1), TX_4X4,
1550 vp9_default_scan_4x4,
1551 vp9_default_scan_4x4_neighbors);
1552 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1553 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1559 *distortion = thisdistortion >> 2;
1560 *labelyrate = thisrate;
1561 *sse = thissse >> 2;
1563 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1574 ENTROPY_CONTEXT ta[2];
1575 ENTROPY_CONTEXT tl[2];
1579 int_mv *ref_mv, *second_ref_mv;
1587 MB_PREDICTION_MODE modes[4];
1588 SEG_RDSTAT rdstat[4][INTER_MODES];
1592 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1594 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1595 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1596 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1597 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1601 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1602 MB_MODE_INFO *const mbmi = &x->e_mbd.mode_info_context->mbmi;
1603 struct macroblock_plane *const p = &x->plane[0];
1604 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1606 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
1608 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1609 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
1611 if (mbmi->ref_frame[1])
1612 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
1616 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1617 struct buf_2d orig_pre[2]) {
1618 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1619 x->plane[0].src = orig_src;
1620 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1621 if (mbmi->ref_frame[1])
1622 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1625 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1626 BEST_SEG_INFO *bsi_buf, int filter_idx,
1627 int_mv seg_mvs[4][MAX_REF_FRAMES],
1628 int mi_row, int mi_col) {
1629 int i, j, br = 0, idx, idy;
1630 int64_t bd = 0, block_sse = 0;
1631 MB_PREDICTION_MODE this_mode;
1632 MODE_INFO *mi = x->e_mbd.mode_info_context;
1633 MB_MODE_INFO *const mbmi = &mi->mbmi;
1634 const int label_count = 4;
1635 int64_t this_segment_rd = 0;
1636 int label_mv_thresh;
1637 int segmentyrate = 0;
1638 const BLOCK_SIZE bsize = mbmi->sb_type;
1639 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1640 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1641 vp9_variance_fn_ptr_t *v_fn_ptr;
1642 ENTROPY_CONTEXT t_above[2], t_left[2];
1643 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1645 int subpelmv = 1, have_ref = 0;
1647 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1648 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1650 v_fn_ptr = &cpi->fn_ptr[bsize];
1652 // 64 makes this threshold really big effectively
1653 // making it so that we very rarely check mvs on
1654 // segments. setting this to 1 would make mv thresh
1655 // roughly equal to what it is for macroblocks
1656 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1658 // Segmentation method overheads
1659 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1660 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1661 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1662 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1663 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1664 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1665 MB_PREDICTION_MODE mode_selected = ZEROMV;
1666 int64_t best_rd = INT64_MAX;
1669 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1670 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1671 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1672 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1673 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1674 i, 0, mi_row, mi_col);
1675 if (mbmi->ref_frame[1] > 0)
1676 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1677 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1678 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1679 i, 1, mi_row, mi_col);
1681 // search for the best motion vector on this segment
1682 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1683 const struct buf_2d orig_src = x->plane[0].src;
1684 struct buf_2d orig_pre[2];
1686 mode_idx = inter_mode_offset(this_mode);
1687 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1689 // if we're near/nearest and mv == 0,0, compare to zeromv
1690 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1691 this_mode == ZEROMV) &&
1692 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1693 (mbmi->ref_frame[1] <= 0 ||
1694 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1695 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1696 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1697 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1698 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1700 if (this_mode == NEARMV) {
1703 } else if (this_mode == NEARESTMV) {
1707 assert(this_mode == ZEROMV);
1708 if (mbmi->ref_frame[1] <= 0) {
1710 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1712 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1716 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1717 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1719 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1720 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1726 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1727 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1728 sizeof(bsi->rdstat[i][mode_idx].ta));
1729 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1730 sizeof(bsi->rdstat[i][mode_idx].tl));
1732 // motion search for newmv (single predictor case only)
1733 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV &&
1734 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1737 int thissme, bestsme = INT_MAX;
1738 int sadpb = x->sadperbit4;
1742 /* Is the best so far sufficiently good that we cant justify doing
1743 * and new motion search. */
1744 if (best_rd < label_mv_thresh)
1747 if (cpi->compressor_speed) {
1748 // use previous block's result as next block's MV predictor.
1751 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1754 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1758 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1760 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1761 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1762 // Take wtd average of the step_params based on the last frame's
1763 // max mv magnitude and the best ref mvs of the current block for
1764 // the given reference.
1765 step_param = (vp9_init_search_range(cpi, max_mv) +
1766 cpi->mv_step_param) >> 1;
1768 step_param = cpi->mv_step_param;
1771 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1773 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1774 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1776 // adjust src pointer for this block
1778 if (cpi->sf.search_method == HEX) {
1779 bestsme = vp9_hex_search(x, &mvp_full,
1781 sadpb, 1, v_fn_ptr, 1,
1782 bsi->ref_mv, &mode_mv[NEWMV]);
1783 } else if (cpi->sf.search_method == SQUARE) {
1784 bestsme = vp9_square_search(x, &mvp_full,
1786 sadpb, 1, v_fn_ptr, 1,
1787 bsi->ref_mv, &mode_mv[NEWMV]);
1788 } else if (cpi->sf.search_method == BIGDIA) {
1789 bestsme = vp9_bigdia_search(x, &mvp_full,
1791 sadpb, 1, v_fn_ptr, 1,
1792 bsi->ref_mv, &mode_mv[NEWMV]);
1794 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1795 sadpb, further_steps, 0, v_fn_ptr,
1796 bsi->ref_mv, &mode_mv[NEWMV]);
1799 // Should we do a full search (best quality only)
1800 if (cpi->compressor_speed == 0) {
1801 /* Check if mvp_full is within the range. */
1802 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1803 x->mv_row_min, x->mv_row_max);
1805 thissme = cpi->full_search_sad(x, &mvp_full,
1806 sadpb, 16, v_fn_ptr,
1807 x->nmvjointcost, x->mvcost,
1810 if (thissme < bestsme) {
1812 mode_mv[NEWMV].as_int =
1813 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1815 /* The full search result is actually worse so re-instate the
1816 * previous best vector */
1817 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1818 mode_mv[NEWMV].as_int;
1822 if (bestsme < INT_MAX) {
1825 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1826 bsi->ref_mv, x->errorperbit, v_fn_ptr,
1827 0, cpi->sf.subpel_iters_per_step,
1828 x->nmvjointcost, x->mvcost,
1831 // safe motion search result for use in compound prediction
1832 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1835 // restore src pointers
1836 mi_buf_restore(x, orig_src, orig_pre);
1839 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV &&
1840 mbmi->interp_filter == EIGHTTAP) {
1841 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1842 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1845 // adjust src pointers
1847 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1849 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1850 mi_row, mi_col, seg_mvs[i],
1852 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1853 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1854 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1855 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1857 // restore src pointers
1858 mi_buf_restore(x, orig_src, orig_pre);
1861 bsi->rdstat[i][mode_idx].brate =
1862 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1863 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1864 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1867 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1868 if (num_4x4_blocks_wide > 1)
1869 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1870 mode_mv[this_mode].as_int;
1871 if (num_4x4_blocks_high > 1)
1872 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1873 mode_mv[this_mode].as_int;
1874 if (mbmi->ref_frame[1] > 0) {
1875 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1876 second_mode_mv[this_mode].as_int;
1877 if (num_4x4_blocks_wide > 1)
1878 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1879 second_mode_mv[this_mode].as_int;
1880 if (num_4x4_blocks_high > 1)
1881 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1882 second_mode_mv[this_mode].as_int;
1885 // Trap vectors that reach beyond the UMV borders
1886 if (mv_check_bounds(x, &mode_mv[this_mode]))
1888 if (mbmi->ref_frame[1] > 0 &&
1889 mv_check_bounds(x, &second_mode_mv[this_mode]))
1892 if (filter_idx > 0) {
1893 BEST_SEG_INFO *ref_bsi = bsi_buf;
1894 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
1895 (mode_mv[this_mode].as_mv.col & 0x0f);
1896 have_ref = mode_mv[this_mode].as_int ==
1897 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1898 if (mbmi->ref_frame[1] > 0) {
1899 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
1900 (second_mode_mv[this_mode].as_mv.col & 0x0f);
1901 have_ref &= second_mode_mv[this_mode].as_int ==
1902 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1905 if (filter_idx > 1 && !subpelmv && !have_ref) {
1906 ref_bsi = bsi_buf + 1;
1907 have_ref = mode_mv[this_mode].as_int ==
1908 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1909 if (mbmi->ref_frame[1] > 0) {
1910 have_ref &= second_mode_mv[this_mode].as_int ==
1911 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1915 if (!subpelmv && have_ref &&
1916 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1917 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1918 sizeof(SEG_RDSTAT));
1919 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1920 mode_selected = this_mode;
1921 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1927 bsi->rdstat[i][mode_idx].brdcost =
1928 encode_inter_mb_segment(cpi, x,
1929 bsi->segment_rd - this_segment_rd, i,
1930 &bsi->rdstat[i][mode_idx].byrate,
1931 &bsi->rdstat[i][mode_idx].bdist,
1932 &bsi->rdstat[i][mode_idx].bsse,
1933 bsi->rdstat[i][mode_idx].ta,
1934 bsi->rdstat[i][mode_idx].tl);
1935 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1936 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1937 bsi->rdstat[i][mode_idx].brate, 0);
1938 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1939 bsi->rdstat[i][mode_idx].eobs = x->e_mbd.plane[0].eobs[i];
1942 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1943 mode_selected = this_mode;
1944 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1946 } /*for each 4x4 mode*/
1948 if (best_rd == INT64_MAX) {
1950 for (iy = i + 1; iy < 4; ++iy)
1951 for (midx = 0; midx < INTER_MODES; ++midx)
1952 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1953 bsi->segment_rd = INT64_MAX;
1957 mode_idx = inter_mode_offset(mode_selected);
1958 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1959 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1961 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1962 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1963 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1966 br += bsi->rdstat[i][mode_idx].brate;
1967 bd += bsi->rdstat[i][mode_idx].bdist;
1968 block_sse += bsi->rdstat[i][mode_idx].bsse;
1969 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
1970 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
1972 if (this_segment_rd > bsi->segment_rd) {
1974 for (iy = i + 1; iy < 4; ++iy)
1975 for (midx = 0; midx < INTER_MODES; ++midx)
1976 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1977 bsi->segment_rd = INT64_MAX;
1981 for (j = 1; j < num_4x4_blocks_high; ++j)
1982 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1983 &x->partition_info->bmi[i],
1984 sizeof(x->partition_info->bmi[i]));
1985 for (j = 1; j < num_4x4_blocks_wide; ++j)
1986 vpx_memcpy(&x->partition_info->bmi[i + j],
1987 &x->partition_info->bmi[i],
1988 sizeof(x->partition_info->bmi[i]));
1990 } /* for each label */
1994 bsi->segment_yrate = segmentyrate;
1995 bsi->segment_rd = this_segment_rd;
1996 bsi->sse = block_sse;
1998 // update the coding decisions
1999 for (i = 0; i < 4; ++i)
2000 bsi->modes[i] = x->partition_info->bmi[i].mode;
2003 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2004 int_mv *best_ref_mv,
2005 int_mv *second_best_ref_mv,
2009 int64_t *returndistortion,
2010 int *skippable, int64_t *psse,
2012 int_mv seg_mvs[4][MAX_REF_FRAMES],
2013 BEST_SEG_INFO *bsi_buf,
2015 int mi_row, int mi_col) {
2017 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2018 MACROBLOCKD *xd = &x->e_mbd;
2019 MODE_INFO *mi = xd->mode_info_context;
2020 MB_MODE_INFO *mbmi = &mi->mbmi;
2025 bsi->segment_rd = best_rd;
2026 bsi->ref_mv = best_ref_mv;
2027 bsi->second_ref_mv = second_best_ref_mv;
2028 bsi->mvp.as_int = best_ref_mv->as_int;
2029 bsi->mvthresh = mvthresh;
2031 for (i = 0; i < 4; i++)
2032 bsi->modes[i] = ZEROMV;
2034 rd_check_segment_txsize(cpi, x, bsi_buf, filter_idx, seg_mvs, mi_row, mi_col);
2036 if (bsi->segment_rd > best_rd)
2038 /* set it to the best */
2039 for (i = 0; i < 4; i++) {
2040 mode_idx = inter_mode_offset(bsi->modes[i]);
2041 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2042 if (mbmi->ref_frame[1] > 0)
2043 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2044 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2045 x->partition_info->bmi[i].mode = bsi->modes[i];
2049 * used to set mbmi->mv.as_int
2051 *returntotrate = bsi->r;
2052 *returndistortion = bsi->d;
2053 *returnyrate = bsi->segment_yrate;
2054 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
2056 mbmi->mode = bsi->modes[3];
2058 return bsi->segment_rd;
2061 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2062 uint8_t *ref_y_buffer, int ref_y_stride,
2063 int ref_frame, BLOCK_SIZE block_size ) {
2064 MACROBLOCKD *xd = &x->e_mbd;
2065 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2070 int best_sad = INT_MAX;
2071 int this_sad = INT_MAX;
2072 unsigned int max_mv = 0;
2074 uint8_t *src_y_ptr = x->plane[0].src.buf;
2076 int row_offset, col_offset;
2078 // Get the sad for each candidate reference mv
2079 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
2080 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
2082 max_mv = MAX(max_mv,
2083 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2084 // The list is at an end if we see 0 for a second time.
2085 if (!this_mv.as_int && zero_seen)
2087 zero_seen = zero_seen || !this_mv.as_int;
2089 row_offset = this_mv.as_mv.row >> 3;
2090 col_offset = this_mv.as_mv.col >> 3;
2091 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2093 // Find sad for current vector.
2094 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2095 ref_y_ptr, ref_y_stride,
2098 // Note if it is the best so far.
2099 if (this_sad < best_sad) {
2100 best_sad = this_sad;
2105 // Note the index of the mv that worked best in the reference list.
2106 x->mv_best_ref_index[ref_frame] = best_index;
2107 x->max_mv_context[ref_frame] = max_mv;
2110 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2111 unsigned int *ref_costs_single,
2112 unsigned int *ref_costs_comp,
2113 vp9_prob *comp_mode_p) {
2114 VP9_COMMON *const cm = &cpi->common;
2115 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2116 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2118 if (seg_ref_active) {
2119 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2120 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2123 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2124 vp9_prob comp_inter_p = 128;
2126 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
2127 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
2128 *comp_mode_p = comp_inter_p;
2133 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2135 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
2136 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2137 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2138 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2140 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2141 base_cost += vp9_cost_bit(comp_inter_p, 0);
2143 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2144 ref_costs_single[ALTREF_FRAME] = base_cost;
2145 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2146 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2147 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2148 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2149 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2151 ref_costs_single[LAST_FRAME] = 512;
2152 ref_costs_single[GOLDEN_FRAME] = 512;
2153 ref_costs_single[ALTREF_FRAME] = 512;
2155 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
2156 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2157 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2159 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2160 base_cost += vp9_cost_bit(comp_inter_p, 1);
2162 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2163 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2165 ref_costs_comp[LAST_FRAME] = 512;
2166 ref_costs_comp[GOLDEN_FRAME] = 512;
2171 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2173 PARTITION_INFO *partition,
2175 int_mv *second_ref_mv,
2176 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2177 int64_t tx_size_diff[TX_MODES],
2178 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) {
2179 MACROBLOCKD *const xd = &x->e_mbd;
2181 // Take a snapshot of the coding context so it can be
2182 // restored if we decide to encode this way
2183 ctx->skip = x->skip;
2184 ctx->best_mode_index = mode_index;
2185 ctx->mic = *xd->mode_info_context;
2188 ctx->partition_info = *partition;
2190 ctx->best_ref_mv.as_int = ref_mv->as_int;
2191 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2193 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2194 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2195 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2197 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
2198 // doesn't actually work this way
2199 memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2200 memcpy(ctx->best_filter_diff, best_filter_diff,
2201 sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
2204 static void setup_pred_block(const MACROBLOCKD *xd,
2205 struct buf_2d dst[MAX_MB_PLANE],
2206 const YV12_BUFFER_CONFIG *src,
2207 int mi_row, int mi_col,
2208 const struct scale_factors *scale,
2209 const struct scale_factors *scale_uv) {
2212 dst[0].buf = src->y_buffer;
2213 dst[0].stride = src->y_stride;
2214 dst[1].buf = src->u_buffer;
2215 dst[2].buf = src->v_buffer;
2216 dst[1].stride = dst[2].stride = src->uv_stride;
2218 dst[3].buf = src->alpha_buffer;
2219 dst[3].stride = src->alpha_stride;
2222 // TODO(jkoleszar): Make scale factors per-plane data
2223 for (i = 0; i < MAX_MB_PLANE; i++) {
2224 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2225 i ? scale_uv : scale,
2226 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2230 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2231 int idx, MV_REFERENCE_FRAME frame_type,
2232 BLOCK_SIZE block_size,
2233 int mi_row, int mi_col,
2234 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2235 int_mv frame_near_mv[MAX_REF_FRAMES],
2236 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2237 struct scale_factors scale[MAX_REF_FRAMES]) {
2238 VP9_COMMON *cm = &cpi->common;
2239 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2240 MACROBLOCKD *const xd = &x->e_mbd;
2241 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2243 // set up scaling factors
2244 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2246 scale[frame_type].x_offset_q4 =
2247 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
2248 REF_SCALE_SHIFT) & 0xf;
2249 scale[frame_type].y_offset_q4 =
2250 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
2251 REF_SCALE_SHIFT) & 0xf;
2253 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2254 // use the UV scaling factors.
2255 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2256 &scale[frame_type], &scale[frame_type]);
2258 // Gets an initial list of candidate vectors from neighbours and orders them
2259 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
2260 xd->prev_mode_info_context,
2262 mbmi->ref_mvs[frame_type], mi_row, mi_col);
2264 // Candidate refinement carried out at encoder and decoder
2265 vp9_find_best_ref_mvs(xd,
2266 mbmi->ref_mvs[frame_type],
2267 &frame_nearest_mv[frame_type],
2268 &frame_near_mv[frame_type]);
2270 // Further refinement that is encode side only to test the top few candidates
2271 // in full and choose the best as the centre point for subsequent searches.
2272 // The current implementation doesn't support scaling.
2273 if (!vp9_is_scaled(&scale[frame_type]))
2274 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2275 frame_type, block_size);
2278 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2279 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2280 int fb = get_ref_frame_idx(cpi, ref_frame);
2281 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
2282 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
2283 return scaled_ref_frame;
2286 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2287 const MACROBLOCKD *const xd = &x->e_mbd;
2288 const MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2289 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2290 return SWITCHABLE_INTERP_RATE_FACTOR *
2291 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2294 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2296 int mi_row, int mi_col,
2297 int_mv *tmp_mv, int *rate_mv) {
2298 MACROBLOCKD *xd = &x->e_mbd;
2299 VP9_COMMON *cm = &cpi->common;
2300 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2301 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2302 int bestsme = INT_MAX;
2303 int further_steps, step_param;
2304 int sadpb = x->sadperbit16;
2306 int ref = mbmi->ref_frame[0];
2307 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2308 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2310 int tmp_col_min = x->mv_col_min;
2311 int tmp_col_max = x->mv_col_max;
2312 int tmp_row_min = x->mv_row_min;
2313 int tmp_row_max = x->mv_row_max;
2315 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2317 if (scaled_ref_frame) {
2319 // Swap out the reference frame for a version that's been scaled to
2320 // match the resolution of the current frame, allowing the existing
2321 // motion search code to be used without additional modifications.
2322 for (i = 0; i < MAX_MB_PLANE; i++)
2323 backup_yv12[i] = xd->plane[i].pre[0];
2325 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2328 vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
2330 // Adjust search parameters based on small partitions' result.
2332 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2333 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2334 // adjust search range
2339 // Get prediction MV.
2340 mvp_full.as_int = x->pred_mv.as_int;
2342 // Adjust MV sign if needed.
2343 if (cm->ref_frame_sign_bias[ref]) {
2344 mvp_full.as_mv.col *= -1;
2345 mvp_full.as_mv.row *= -1;
2348 // Work out the size of the first step in the mv step search.
2349 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2350 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2351 // Take wtd average of the step_params based on the last frame's
2352 // max mv magnitude and that based on the best ref mvs of the current
2353 // block for the given reference.
2354 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2355 cpi->mv_step_param) >> 1;
2357 step_param = cpi->mv_step_param;
2359 // mvp_full.as_int = ref_mv[0].as_int;
2361 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
2364 mvp_full.as_mv.col >>= 3;
2365 mvp_full.as_mv.row >>= 3;
2367 // Further step/diamond searches as necessary
2368 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2370 if (cpi->sf.search_method == HEX) {
2371 bestsme = vp9_hex_search(x, &mvp_full,
2374 &cpi->fn_ptr[block_size], 1,
2376 } else if (cpi->sf.search_method == SQUARE) {
2377 bestsme = vp9_square_search(x, &mvp_full,
2380 &cpi->fn_ptr[block_size], 1,
2382 } else if (cpi->sf.search_method == BIGDIA) {
2383 bestsme = vp9_bigdia_search(x, &mvp_full,
2386 &cpi->fn_ptr[block_size], 1,
2389 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2390 sadpb, further_steps, 1,
2391 &cpi->fn_ptr[block_size],
2395 x->mv_col_min = tmp_col_min;
2396 x->mv_col_max = tmp_col_max;
2397 x->mv_row_min = tmp_row_min;
2398 x->mv_row_max = tmp_row_max;
2400 if (bestsme < INT_MAX) {
2401 int dis; /* TODO: use dis in distortion calculation later. */
2403 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
2405 &cpi->fn_ptr[block_size],
2406 0, cpi->sf.subpel_iters_per_step,
2407 x->nmvjointcost, x->mvcost,
2410 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
2411 x->nmvjointcost, x->mvcost,
2413 if (scaled_ref_frame) {
2415 for (i = 0; i < MAX_MB_PLANE; i++)
2416 xd->plane[i].pre[0] = backup_yv12[i];
2420 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2423 int mi_row, int mi_col,
2424 int_mv single_newmv[MAX_REF_FRAMES],
2426 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2427 MACROBLOCKD *xd = &x->e_mbd;
2428 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2429 int refs[2] = { mbmi->ref_frame[0],
2430 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2432 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2434 // Prediction buffer from second frame.
2435 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2437 // Do joint motion search in compound mode to get more accurate mv.
2438 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2439 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
2440 struct buf_2d scaled_first_yv12;
2441 int last_besterr[2] = {INT_MAX, INT_MAX};
2442 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2443 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
2444 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
2446 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2447 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2449 if (scaled_ref_frame[0]) {
2451 // Swap out the reference frame for a version that's been scaled to
2452 // match the resolution of the current frame, allowing the existing
2453 // motion search code to be used without additional modifications.
2454 for (i = 0; i < MAX_MB_PLANE; i++)
2455 backup_yv12[i] = xd->plane[i].pre[0];
2456 setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
2459 if (scaled_ref_frame[1]) {
2461 for (i = 0; i < MAX_MB_PLANE; i++)
2462 backup_second_yv12[i] = xd->plane[i].pre[1];
2464 setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL);
2467 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
2469 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
2471 scaled_first_yv12 = xd->plane[0].pre[0];
2473 // Initialize mv using single prediction mode result.
2474 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2475 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2477 // Allow joint search multiple times iteratively for each ref frame
2478 // and break out the search loop if it couldn't find better mv.
2479 for (ite = 0; ite < 4; ite++) {
2480 struct buf_2d ref_yv12[2];
2481 int bestsme = INT_MAX;
2482 int sadpb = x->sadperbit16;
2484 int search_range = 3;
2486 int tmp_col_min = x->mv_col_min;
2487 int tmp_col_max = x->mv_col_max;
2488 int tmp_row_min = x->mv_row_min;
2489 int tmp_row_max = x->mv_row_max;
2492 // Initialized here because of compiler problem in Visual Studio.
2493 ref_yv12[0] = xd->plane[0].pre[0];
2494 ref_yv12[1] = xd->plane[0].pre[1];
2496 // Get pred block from second frame.
2497 vp9_build_inter_predictor(ref_yv12[!id].buf,
2498 ref_yv12[!id].stride,
2500 &frame_mv[refs[!id]].as_mv,
2501 &xd->scale_factor[!id],
2503 &xd->subpix, MV_PRECISION_Q3);
2505 // Compound motion search on first ref frame.
2507 xd->plane[0].pre[0] = ref_yv12[id];
2508 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
2510 // Use mv result from single mode as mvp.
2511 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2513 tmp_mv.as_mv.col >>= 3;
2514 tmp_mv.as_mv.row >>= 3;
2516 // Small-range full-pixel motion search
2517 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2519 &cpi->fn_ptr[block_size],
2520 x->nmvjointcost, x->mvcost,
2521 &ref_mv[id], second_pred,
2524 x->mv_col_min = tmp_col_min;
2525 x->mv_col_max = tmp_col_max;
2526 x->mv_row_min = tmp_row_min;
2527 x->mv_row_max = tmp_row_max;
2529 if (bestsme < INT_MAX) {
2530 int dis; /* TODO: use dis in distortion calculation later. */
2533 bestsme = cpi->find_fractional_mv_step_comp(
2537 &cpi->fn_ptr[block_size],
2538 0, cpi->sf.subpel_iters_per_step,
2539 x->nmvjointcost, x->mvcost,
2540 &dis, &sse, second_pred,
2545 xd->plane[0].pre[0] = scaled_first_yv12;
2547 if (bestsme < last_besterr[id]) {
2548 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2549 last_besterr[id] = bestsme;
2555 // restore the predictor
2556 if (scaled_ref_frame[0]) {
2558 for (i = 0; i < MAX_MB_PLANE; i++)
2559 xd->plane[i].pre[0] = backup_yv12[i];
2562 if (scaled_ref_frame[1]) {
2564 for (i = 0; i < MAX_MB_PLANE; i++)
2565 xd->plane[i].pre[1] = backup_second_yv12[i];
2567 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2568 &mbmi->ref_mvs[refs[0]][0],
2569 x->nmvjointcost, x->mvcost, 96);
2570 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2571 &mbmi->ref_mvs[refs[1]][0],
2572 x->nmvjointcost, x->mvcost, 96);
2574 vpx_free(second_pred);
2577 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2579 int64_t txfm_cache[],
2580 int *rate2, int64_t *distortion,
2582 int *rate_y, int64_t *distortion_y,
2583 int *rate_uv, int64_t *distortion_uv,
2584 int *mode_excluded, int *disable_skip,
2585 INTERPOLATIONFILTERTYPE *best_filter,
2586 int_mv (*mode_mv)[MAX_REF_FRAMES],
2587 int mi_row, int mi_col,
2588 int_mv single_newmv[MAX_REF_FRAMES],
2590 const int64_t ref_best_rd) {
2591 VP9_COMMON *cm = &cpi->common;
2592 MACROBLOCKD *xd = &x->e_mbd;
2593 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2594 const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2595 const int num_refs = is_comp_pred ? 2 : 1;
2596 const int this_mode = mbmi->mode;
2597 int_mv *frame_mv = mode_mv[this_mode];
2599 int refs[2] = { mbmi->ref_frame[0],
2600 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2602 int64_t this_rd = 0;
2603 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2604 int pred_exists = 0;
2606 int64_t rd, best_rd = INT64_MAX;
2607 int best_needs_copy = 0;
2608 uint8_t *orig_dst[MAX_MB_PLANE];
2609 int orig_dst_stride[MAX_MB_PLANE];
2612 if (this_mode == NEWMV) {
2615 // Initialize mv using single prediction mode result.
2616 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2617 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2619 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2620 joint_motion_search(cpi, x, bsize, frame_mv,
2621 mi_row, mi_col, single_newmv, &rate_mv);
2623 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2624 &mbmi->ref_mvs[refs[0]][0],
2625 x->nmvjointcost, x->mvcost, 96);
2626 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2627 &mbmi->ref_mvs[refs[1]][0],
2628 x->nmvjointcost, x->mvcost, 96);
2630 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2631 frame_mv[refs[1]].as_int == INVALID_MV)
2636 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2638 frame_mv[refs[0]].as_int =
2639 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2640 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2644 // if we're near/nearest and mv == 0,0, compare to zeromv
2645 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2646 frame_mv[refs[0]].as_int == 0 &&
2647 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2648 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2649 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2650 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2651 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2652 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2654 if (this_mode == NEARMV) {
2657 } else if (this_mode == NEARESTMV) {
2661 assert(this_mode == ZEROMV);
2662 if (num_refs == 1) {
2664 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2666 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2670 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2671 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2673 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2674 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2680 for (i = 0; i < num_refs; ++i) {
2681 cur_mv[i] = frame_mv[refs[i]];
2682 // Clip "next_nearest" so that it does not extend to far out of image
2683 if (this_mode != NEWMV)
2684 clamp_mv2(&cur_mv[i].as_mv, xd);
2686 if (mv_check_bounds(x, &cur_mv[i]))
2688 mbmi->mv[i].as_int = cur_mv[i].as_int;
2691 // do first prediction into the destination buffer. Do the next
2692 // prediction into a temporary buffer. Then keep track of which one
2693 // of these currently holds the best predictor, and use the other
2694 // one for future predictions. In the end, copy from tmp_buf to
2695 // dst if necessary.
2696 for (i = 0; i < MAX_MB_PLANE; i++) {
2697 orig_dst[i] = xd->plane[i].dst.buf;
2698 orig_dst_stride[i] = xd->plane[i].dst.stride;
2701 /* We don't include the cost of the second reference here, because there
2702 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2703 * words if you present them in that order, the second one is always known
2704 * if the first is known */
2705 *rate2 += cost_mv_ref(cpi, this_mode,
2706 mbmi->mode_context[mbmi->ref_frame[0]]);
2708 if (!(*mode_excluded)) {
2710 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2712 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2717 // Are all MVs integer pel for Y and UV
2718 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2719 (mbmi->mv[0].as_mv.col & 15) == 0;
2721 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2722 (mbmi->mv[1].as_mv.col & 15) == 0;
2723 // Search for best switchable filter by checking the variance of
2724 // pred error irrespective of whether the filter will be used
2725 if (cm->mcomp_filter_type != BILINEAR) {
2726 *best_filter = EIGHTTAP;
2727 if (x->source_variance <
2728 cpi->sf.disable_filter_search_var_thresh) {
2729 *best_filter = EIGHTTAP;
2730 vp9_zero(cpi->rd_filter_cache);
2733 int tmp_rate_sum = 0;
2734 int64_t tmp_dist_sum = 0;
2736 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
2737 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2740 mbmi->interp_filter = i;
2741 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2742 rs = get_switchable_rate(x);
2743 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2745 if (i > 0 && intpel_mv) {
2746 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2747 tmp_rate_sum, tmp_dist_sum);
2748 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2749 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2750 cpi->rd_filter_cache[i] + rs_rd);
2751 rd = cpi->rd_filter_cache[i];
2752 if (cm->mcomp_filter_type == SWITCHABLE)
2756 int64_t dist_sum = 0;
2757 if ((cm->mcomp_filter_type == SWITCHABLE &&
2758 (!i || best_needs_copy)) ||
2759 (cm->mcomp_filter_type != SWITCHABLE &&
2760 (cm->mcomp_filter_type == mbmi->interp_filter ||
2761 (i == 0 && intpel_mv)))) {
2762 for (j = 0; j < MAX_MB_PLANE; j++) {
2763 xd->plane[j].dst.buf = orig_dst[j];
2764 xd->plane[j].dst.stride = orig_dst_stride[j];
2767 for (j = 0; j < MAX_MB_PLANE; j++) {
2768 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2769 xd->plane[j].dst.stride = 64;
2772 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2773 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2774 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2775 rate_sum, dist_sum);
2776 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2777 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2778 cpi->rd_filter_cache[i] + rs_rd);
2779 rd = cpi->rd_filter_cache[i];
2780 if (cm->mcomp_filter_type == SWITCHABLE)
2782 if (i == 0 && intpel_mv) {
2783 tmp_rate_sum = rate_sum;
2784 tmp_dist_sum = dist_sum;
2787 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2788 if (rd / 2 > ref_best_rd) {
2789 for (i = 0; i < MAX_MB_PLANE; i++) {
2790 xd->plane[i].dst.buf = orig_dst[i];
2791 xd->plane[i].dst.stride = orig_dst_stride[i];
2796 newbest = i == 0 || rd < best_rd;
2800 *best_filter = mbmi->interp_filter;
2801 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
2802 best_needs_copy = !best_needs_copy;
2805 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2806 (cm->mcomp_filter_type != SWITCHABLE &&
2807 cm->mcomp_filter_type == mbmi->interp_filter)) {
2812 for (i = 0; i < MAX_MB_PLANE; i++) {
2813 xd->plane[i].dst.buf = orig_dst[i];
2814 xd->plane[i].dst.stride = orig_dst_stride[i];
2818 // Set the appropriate filter
2819 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2820 cm->mcomp_filter_type : *best_filter;
2821 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2822 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
2825 if (best_needs_copy) {
2826 // again temporarily set the buffers to local memory to prevent a memcpy
2827 for (i = 0; i < MAX_MB_PLANE; i++) {
2828 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2829 xd->plane[i].dst.stride = 64;
2833 // Handles the special case when a filter that is not in the
2834 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2835 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2839 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2842 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2843 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2844 // if current pred_error modeled rd is substantially more than the best
2845 // so far, do not bother doing full rd
2846 if (rd / 2 > ref_best_rd) {
2847 for (i = 0; i < MAX_MB_PLANE; i++) {
2848 xd->plane[i].dst.buf = orig_dst[i];
2849 xd->plane[i].dst.stride = orig_dst_stride[i];
2855 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2856 *rate2 += get_switchable_rate(x);
2858 if (!is_comp_pred) {
2859 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2861 else if (x->encode_breakout) {
2862 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2863 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2864 unsigned int var, sse;
2865 // Skipping threshold for ac.
2866 unsigned int thresh_ac;
2867 // The encode_breakout input
2868 unsigned int encode_breakout = x->encode_breakout << 4;
2870 // Calculate threshold according to dequant value.
2871 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2873 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2874 if (thresh_ac > 36000)
2877 // Use encode_breakout input if it is bigger than internal threshold.
2878 if (thresh_ac < encode_breakout)
2879 thresh_ac = encode_breakout;
2881 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2882 xd->plane[0].dst.buf,
2883 xd->plane[0].dst.stride, &sse);
2885 // Adjust threshold according to partition size.
2886 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2887 b_height_log2_lookup[bsize]);
2889 // Y skipping condition checking
2890 if (sse < thresh_ac || sse == 0) {
2891 // Skipping threshold for dc
2892 unsigned int thresh_dc;
2894 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2896 // dc skipping checking
2897 if ((sse - var) < thresh_dc || sse == var) {
2898 unsigned int sse_u, sse_v;
2899 unsigned int var_u, var_v;
2901 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2902 x->plane[1].src.stride,
2903 xd->plane[1].dst.buf,
2904 xd->plane[1].dst.stride, &sse_u);
2906 // U skipping condition checking
2907 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2908 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2909 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2910 x->plane[2].src.stride,
2911 xd->plane[2].dst.buf,
2912 xd->plane[2].dst.stride, &sse_v);
2914 // V skipping condition checking
2915 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2916 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2922 // Scaling factor for SSE from spatial domain to frequency domain
2923 // is 16. Adjust distortion accordingly.
2924 *distortion_uv = (sse_u + sse_v) << 4;
2925 *distortion = (sse << 4) + *distortion_uv;
2928 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2937 int skippable_y, skippable_uv;
2938 int64_t sseuv = INT64_MAX;
2939 int64_t rdcosty = INT64_MAX;
2941 // Y cost and distortion
2942 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2943 bsize, txfm_cache, ref_best_rd);
2945 if (*rate_y == INT_MAX) {
2947 *distortion = INT64_MAX;
2948 for (i = 0; i < MAX_MB_PLANE; i++) {
2949 xd->plane[i].dst.buf = orig_dst[i];
2950 xd->plane[i].dst.stride = orig_dst_stride[i];
2956 *distortion += *distortion_y;
2958 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2959 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2961 super_block_uvrd(cm, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2962 bsize, ref_best_rd - rdcosty);
2963 if (*rate_uv == INT_MAX) {
2965 *distortion = INT64_MAX;
2966 for (i = 0; i < MAX_MB_PLANE; i++) {
2967 xd->plane[i].dst.buf = orig_dst[i];
2968 xd->plane[i].dst.stride = orig_dst_stride[i];
2975 *distortion += *distortion_uv;
2976 *skippable = skippable_y && skippable_uv;
2979 for (i = 0; i < MAX_MB_PLANE; i++) {
2980 xd->plane[i].dst.buf = orig_dst[i];
2981 xd->plane[i].dst.stride = orig_dst_stride[i];
2984 return this_rd; // if 0, this will be re-calculated by caller
2987 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2988 int *returnrate, int64_t *returndist,
2990 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2991 VP9_COMMON *const cm = &cpi->common;
2992 MACROBLOCKD *const xd = &x->e_mbd;
2993 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2994 int y_skip = 0, uv_skip;
2995 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2998 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
2999 if (bsize >= BLOCK_8X8) {
3000 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3001 &dist_y, &y_skip, bsize, tx_cache,
3002 best_rd) >= best_rd) {
3003 *returnrate = INT_MAX;
3006 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3007 &dist_uv, &uv_skip, bsize);
3010 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3011 &dist_y, best_rd) >= best_rd) {
3012 *returnrate = INT_MAX;
3015 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3016 &dist_uv, &uv_skip, BLOCK_8X8);
3019 if (y_skip && uv_skip) {
3020 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3021 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3022 *returndist = dist_y + dist_uv;
3023 vp9_zero(ctx->tx_rd_diff);
3026 *returnrate = rate_y + rate_uv +
3027 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3028 *returndist = dist_y + dist_uv;
3029 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3030 for (i = 0; i < TX_MODES; i++)
3031 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3034 ctx->mic = *xd->mode_info_context;
3037 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3038 int mi_row, int mi_col,
3040 int64_t *returndistortion,
3042 PICK_MODE_CONTEXT *ctx,
3043 int64_t best_rd_so_far) {
3044 VP9_COMMON *cm = &cpi->common;
3045 MACROBLOCKD *xd = &x->e_mbd;
3046 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
3047 const struct segmentation *seg = &cm->seg;
3048 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3049 MB_PREDICTION_MODE this_mode;
3050 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3051 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
3053 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3054 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3055 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3056 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3058 int idx_list[4] = {0,
3062 int64_t best_rd = best_rd_so_far;
3063 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3064 int64_t best_tx_rd[TX_MODES];
3065 int64_t best_tx_diff[TX_MODES];
3066 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3067 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3068 int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
3069 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
3070 MB_MODE_INFO best_mbmode = { 0 };
3072 int mode_index, best_mode_index = 0;
3073 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3074 vp9_prob comp_mode_p;
3075 int64_t best_intra_rd = INT64_MAX;
3076 int64_t best_inter_rd = INT64_MAX;
3077 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3078 // MB_PREDICTION_MODE best_inter_mode = ZEROMV;
3079 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3080 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
3081 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3082 int64_t dist_uv[TX_SIZES];
3083 int skip_uv[TX_SIZES];
3084 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3085 struct scale_factors scale_factor[4];
3086 unsigned int ref_frame_mask = 0;
3087 unsigned int mode_mask = 0;
3088 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3089 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3090 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3091 cpi->common.y_dc_delta_q);
3092 int_mv seg_mvs[4][MAX_REF_FRAMES];
3093 union b_mode_info best_bmodes[4];
3094 PARTITION_INFO best_partition;
3095 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3096 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3099 x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
3101 for (i = 0; i < 4; i++) {
3103 for (j = 0; j < MAX_REF_FRAMES; j++)
3104 seg_mvs[i][j].as_int = INVALID_MV;
3106 // Everywhere the flag is set the error is much higher than its neighbors.
3107 ctx->frames_with_high_error = 0;
3108 ctx->modes_with_high_error = 0;
3110 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3113 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3114 best_pred_rd[i] = INT64_MAX;
3115 for (i = 0; i < TX_MODES; i++)
3116 best_tx_rd[i] = INT64_MAX;
3117 for (i = 0; i <= SWITCHABLE_FILTERS; i++)
3118 best_filter_rd[i] = INT64_MAX;
3119 for (i = 0; i < TX_SIZES; i++)
3120 rate_uv_intra[i] = INT_MAX;
3122 *returnrate = INT_MAX;
3124 // Create a mask set to 1 for each reference frame used by a smaller
3126 if (cpi->sf.use_avoid_tested_higherror) {
3127 switch (block_size) {
3129 for (i = 0; i < 4; i++) {
3130 for (j = 0; j < 4; j++) {
3131 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3132 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3135 for (i = 0; i < 4; i++) {
3136 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3137 mode_mask |= x->sb32_context[i].modes_with_high_error;
3141 for (i = 0; i < 4; i++) {
3143 x->mb_context[xd->sb_index][i].frames_with_high_error;
3144 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
3148 // Until we handle all block sizes set it to present;
3153 ref_frame_mask = ~ref_frame_mask;
3154 mode_mask = ~mode_mask;
3157 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3158 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3159 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
3160 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
3161 yv12_mb, scale_factor);
3163 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3164 frame_mv[ZEROMV][ref_frame].as_int = 0;
3167 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3168 int mode_excluded = 0;
3169 int64_t this_rd = INT64_MAX;
3170 int disable_skip = 0;
3171 int compmode_cost = 0;
3172 int rate2 = 0, rate_y = 0, rate_uv = 0;
3173 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3175 int64_t tx_cache[TX_MODES];
3178 int64_t total_sse = INT_MAX;
3181 for (i = 0; i < TX_MODES; ++i)
3182 tx_cache[i] = INT64_MAX;
3185 this_mode = vp9_mode_order[mode_index].mode;
3186 ref_frame = vp9_mode_order[mode_index].ref_frame;
3187 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3189 // Skip modes that have been masked off but always consider first mode.
3190 if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
3191 (cpi->unused_mode_skip_mask & (1 << mode_index)) )
3194 // Skip if the current reference frame has been masked off
3195 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3196 (cpi->ref_frame_mask & (1 << ref_frame)))
3199 // Test best rd so far against threshold for trying this mode.
3200 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
3201 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
3202 cpi->rd_threshes[bsize][mode_index] == INT_MAX)
3205 // Do not allow compound prediction if the segment level reference
3206 // frame feature is in use as in this case there can only be one reference.
3207 if ((second_ref_frame > INTRA_FRAME) &&
3208 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3211 // Skip some checking based on small partitions' result.
3212 if (x->fast_ms > 1 && !ref_frame)
3214 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3217 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3218 if (!(ref_frame_mask & (1 << ref_frame))) {
3221 if (!(mode_mask & (1 << this_mode))) {
3224 if (second_ref_frame != NONE
3225 && !(ref_frame_mask & (1 << second_ref_frame))) {
3230 mbmi->ref_frame[0] = ref_frame;
3231 mbmi->ref_frame[1] = second_ref_frame;
3233 if (!(ref_frame == INTRA_FRAME
3234 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3237 if (!(second_ref_frame == NONE
3238 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3242 comp_pred = second_ref_frame > INTRA_FRAME;
3244 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3245 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3247 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3248 if (ref_frame != best_inter_ref_frame &&
3249 second_ref_frame != best_inter_ref_frame)
3252 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3254 if (ref_frame > 0 &&
3255 vp9_is_scaled(&scale_factor[ref_frame]) &&
3256 this_mode == SPLITMV)
3259 if (second_ref_frame > 0 &&
3260 vp9_is_scaled(&scale_factor[second_ref_frame]) &&
3261 this_mode == SPLITMV)
3264 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3265 mbmi->mode = this_mode;
3266 mbmi->uv_mode = DC_PRED;
3268 // Evaluate all sub-pel filters irrespective of whether we can use
3269 // them for this frame.
3270 mbmi->interp_filter = cm->mcomp_filter_type;
3271 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3273 if (bsize >= BLOCK_8X8 &&
3274 (this_mode == I4X4_PRED || this_mode == SPLITMV))
3276 if (bsize < BLOCK_8X8 &&
3277 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
3281 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3283 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3285 mode_excluded = mode_excluded
3287 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3289 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3292 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3296 // Select prediction reference frames.
3297 for (i = 0; i < MAX_MB_PLANE; i++) {
3298 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3300 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3303 // If the segment reference frame feature is enabled....
3304 // then do nothing if the current ref frame is not allowed..
3305 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3306 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3309 // If the segment skip feature is enabled....
3310 // then do nothing if the current mode is not allowed..
3311 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3312 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3314 // Disable this drop out case if the ref frame
3315 // segment level feature is enabled for this segment. This is to
3316 // prevent the possibility that we end up unable to pick any mode.
3317 } else if (!vp9_segfeature_active(seg, segment_id,
3318 SEG_LVL_REF_FRAME)) {
3319 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3320 // unless ARNR filtering is enabled in which case we want
3321 // an unfiltered alternative. We allow near/nearest as well
3322 // because they may result in zero-zero MVs but be cheaper.
3323 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3324 if ((this_mode != ZEROMV &&
3325 !(this_mode == NEARMV &&
3326 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3327 !(this_mode == NEARESTMV &&
3328 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3329 ref_frame != ALTREF_FRAME) {
3334 // TODO(JBB): This is to make up for the fact that we don't have sad
3335 // functions that work when the block size reads outside the umv. We
3336 // should fix this either by making the motion search just work on
3337 // a representative block in the boundary ( first ) and then implement a
3338 // function that does sads when inside the border..
3339 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3340 this_mode == NEWMV) {
3344 #ifdef MODE_TEST_HIT_STATS
3346 // Keep a rcord of the number of test hits at each size
3347 cpi->mode_test_hits[bsize]++;
3350 if (this_mode == I4X4_PRED) {
3354 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3355 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME))
3359 // I4X4_PRED is only considered for block sizes less than 8x8.
3360 mbmi->tx_size = TX_4X4;
3361 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3362 &distortion_y, best_rd) >= best_rd)
3365 rate2 += intra_cost_penalty;
3366 distortion2 += distortion_y;
3368 if (rate_uv_intra[TX_4X4] == INT_MAX) {
3369 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
3370 &rate_uv_tokenonly[TX_4X4],
3371 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
3374 rate2 += rate_uv_intra[TX_4X4];
3375 rate_uv = rate_uv_tokenonly[TX_4X4];
3376 distortion2 += dist_uv[TX_4X4];
3377 distortion_uv = dist_uv[TX_4X4];
3378 mbmi->uv_mode = mode_uv[TX_4X4];
3379 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3380 for (i = 0; i < TX_MODES; ++i)
3381 tx_cache[i] = tx_cache[ONLY_4X4];
3382 } else if (ref_frame == INTRA_FRAME) {
3384 // Disable intra modes other than DC_PRED for blocks with low variance
3385 // Threshold for intra skipping based on source variance
3386 // TODO(debargha): Specialize the threshold for super block sizes
3387 static const int skip_intra_var_thresh[BLOCK_SIZES] = {
3388 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3390 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3391 this_mode != DC_PRED &&
3392 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3394 // Only search the oblique modes if the best so far is
3395 // one of the neighboring directional modes
3396 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3397 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3398 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3401 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3402 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3405 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3406 bsize, tx_cache, best_rd);
3408 if (rate_y == INT_MAX)
3411 uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
3412 if (rate_uv_intra[uv_tx] == INT_MAX) {
3413 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
3414 &rate_uv_tokenonly[uv_tx],
3415 &dist_uv[uv_tx], &skip_uv[uv_tx],
3419 rate_uv = rate_uv_tokenonly[uv_tx];
3420 distortion_uv = dist_uv[uv_tx];
3421 skippable = skippable && skip_uv[uv_tx];
3422 mbmi->uv_mode = mode_uv[uv_tx];
3424 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3425 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
3426 rate2 += intra_cost_penalty;
3427 distortion2 = distortion_y + distortion_uv;
3428 } else if (this_mode == SPLITMV) {
3429 const int is_comp_pred = second_ref_frame > 0;
3432 int64_t this_rd_thresh;
3433 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3434 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3435 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3436 int tmp_best_skippable = 0;
3437 int switchable_filter_index;
3438 int_mv *second_ref = is_comp_pred ?
3439 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3440 union b_mode_info tmp_best_bmodes[16];
3441 MB_MODE_INFO tmp_best_mbmode;
3442 PARTITION_INFO tmp_best_partition;
3443 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3444 int pred_exists = 0;
3447 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3448 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3450 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3451 if (ref_frame != best_inter_ref_frame &&
3452 second_ref_frame != best_inter_ref_frame)
3456 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3457 cpi->rd_threshes[bsize][THR_NEWMV] :
3458 cpi->rd_threshes[bsize][THR_NEWA];
3459 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3460 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
3461 xd->mode_info_context->mbmi.tx_size = TX_4X4;
3463 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
3464 if (cm->mcomp_filter_type != BILINEAR) {
3465 tmp_best_filter = EIGHTTAP;
3466 if (x->source_variance <
3467 cpi->sf.disable_filter_search_var_thresh) {
3468 tmp_best_filter = EIGHTTAP;
3469 vp9_zero(cpi->rd_filter_cache);
3471 for (switchable_filter_index = 0;
3472 switchable_filter_index < SWITCHABLE_FILTERS;
3473 ++switchable_filter_index) {
3476 mbmi->interp_filter = switchable_filter_index;
3477 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3479 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3480 &mbmi->ref_mvs[ref_frame][0],
3483 &rate, &rate_y, &distortion,
3484 &skippable, &total_sse,
3485 (int)this_rd_thresh, seg_mvs,
3486 bsi, switchable_filter_index,
3489 if (tmp_rd == INT64_MAX)
3491 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
3492 rs = get_switchable_rate(x);
3493 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3494 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
3495 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
3497 if (cm->mcomp_filter_type == SWITCHABLE)
3500 newbest = (tmp_rd < tmp_best_rd);
3502 tmp_best_filter = mbmi->interp_filter;
3503 tmp_best_rd = tmp_rd;
3505 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
3506 (mbmi->interp_filter == cm->mcomp_filter_type &&
3507 cm->mcomp_filter_type != SWITCHABLE)) {
3508 tmp_best_rdu = tmp_rd;
3509 tmp_best_rate = rate;
3510 tmp_best_ratey = rate_y;
3511 tmp_best_distortion = distortion;
3512 tmp_best_sse = total_sse;
3513 tmp_best_skippable = skippable;
3514 tmp_best_mbmode = *mbmi;
3515 tmp_best_partition = *x->partition_info;
3516 for (i = 0; i < 4; i++)
3517 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
3519 if (switchable_filter_index == 0 &&
3520 cpi->sf.use_rd_breakout &&
3521 best_rd < INT64_MAX) {
3522 if (tmp_best_rdu / 2 > best_rd) {
3523 // skip searching the other filters if the first is
3524 // already substantially larger than the best so far
3525 tmp_best_filter = mbmi->interp_filter;
3526 tmp_best_rdu = INT64_MAX;
3531 } // switchable_filter_index loop
3535 if (tmp_best_rdu == INT64_MAX)
3538 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
3539 tmp_best_filter : cm->mcomp_filter_type);
3540 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3542 // Handles the special case when a filter that is not in the
3543 // switchable list (bilinear, 6-tap) is indicated at the frame level
3544 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3545 &mbmi->ref_mvs[ref_frame][0],
3548 &rate, &rate_y, &distortion,
3549 &skippable, &total_sse,
3550 (int)this_rd_thresh, seg_mvs,
3553 if (tmp_rd == INT64_MAX)
3556 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
3557 int rs = get_switchable_rate(x);
3558 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
3560 tmp_rd = tmp_best_rdu;
3561 total_sse = tmp_best_sse;
3562 rate = tmp_best_rate;
3563 rate_y = tmp_best_ratey;
3564 distortion = tmp_best_distortion;
3565 skippable = tmp_best_skippable;
3566 *mbmi = tmp_best_mbmode;
3567 *x->partition_info = tmp_best_partition;
3568 for (i = 0; i < 4; i++)
3569 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
3573 distortion2 += distortion;
3575 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3576 rate2 += get_switchable_rate(x);
3578 if (!mode_excluded) {
3580 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
3582 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
3584 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
3586 tmp_best_rdu = best_rd -
3587 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3588 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3590 if (tmp_best_rdu > 0) {
3591 // If even the 'Y' rd value of split is higher than best so far
3592 // then dont bother looking at UV
3593 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3595 super_block_uvrd(cm, x, &rate_uv, &distortion_uv, &uv_skippable,
3596 &uv_sse, BLOCK_8X8, tmp_best_rdu);
3597 if (rate_uv == INT_MAX)
3600 distortion2 += distortion_uv;
3601 skippable = skippable && uv_skippable;
3602 total_sse += uv_sse;
3604 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3605 for (i = 0; i < TX_MODES; ++i)
3606 tx_cache[i] = tx_cache[ONLY_4X4];
3609 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3610 this_rd = handle_inter_mode(cpi, x, bsize,
3612 &rate2, &distortion2, &skippable,
3613 &rate_y, &distortion_y,
3614 &rate_uv, &distortion_uv,
3615 &mode_excluded, &disable_skip,
3616 &tmp_best_filter, frame_mv,
3618 single_newmv, &total_sse, best_rd);
3619 if (this_rd == INT64_MAX)
3623 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3624 rate2 += compmode_cost;
3627 // Estimate the reference frame signaling cost and add it
3628 // to the rolling cost variable.
3629 if (second_ref_frame > INTRA_FRAME) {
3630 rate2 += ref_costs_comp[ref_frame];
3632 rate2 += ref_costs_single[ref_frame];
3635 if (!disable_skip) {
3636 // Test for the condition where skip block will be activated
3637 // because there are no non zero coefficients and make any
3638 // necessary adjustment for rate. Ignore if skip is coded at
3639 // segment level as the cost wont have been added in.
3640 // Is Mb level skip allowed (i.e. not coded at segment level).
3641 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3644 if (skippable && bsize >= BLOCK_8X8) {
3645 // Back out the coefficient coding costs
3646 rate2 -= (rate_y + rate_uv);
3647 // for best yrd calculation
3650 if (mb_skip_allowed) {
3653 // Cost the skip mb case
3654 vp9_prob skip_prob =
3655 vp9_get_pred_prob_mbskip(cm, xd);
3658 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3659 rate2 += prob_skip_cost;
3662 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3663 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3664 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3665 // Add in the cost of the no skip flag.
3666 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3668 rate2 += prob_skip_cost;
3670 // FIXME(rbultje) make this work for splitmv also
3671 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3673 rate2 += prob_skip_cost;
3674 distortion2 = total_sse;
3675 assert(total_sse >= 0);
3676 rate2 -= (rate_y + rate_uv);
3681 } else if (mb_skip_allowed) {
3682 // Add in the cost of the no skip flag.
3683 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3685 rate2 += prob_skip_cost;
3688 // Calculate the final RD estimate for this mode.
3689 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3692 // Keep record of best intra rd
3693 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME &&
3694 is_intra_mode(xd->mode_info_context->mbmi.mode) &&
3695 this_rd < best_intra_rd) {
3696 best_intra_rd = this_rd;
3697 best_intra_mode = xd->mode_info_context->mbmi.mode;
3699 // Keep record of best inter rd with single reference
3700 if (xd->mode_info_context->mbmi.ref_frame[0] > INTRA_FRAME &&
3701 xd->mode_info_context->mbmi.ref_frame[1] == NONE &&
3703 this_rd < best_inter_rd) {
3704 best_inter_rd = this_rd;
3705 best_inter_ref_frame = ref_frame;
3706 // best_inter_mode = xd->mode_info_context->mbmi.mode;
3709 if (!disable_skip && ref_frame == INTRA_FRAME) {
3710 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3711 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3712 for (i = 0; i <= SWITCHABLE_FILTERS; i++)
3713 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3716 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3717 // Store the respective mode distortions for later use.
3718 if (mode_distortions[this_mode] == -1
3719 || distortion2 < mode_distortions[this_mode]) {
3720 mode_distortions[this_mode] = distortion2;
3722 if (frame_distortions[ref_frame] == -1
3723 || distortion2 < frame_distortions[ref_frame]) {
3724 frame_distortions[ref_frame] = distortion2;
3728 // Did this mode help.. i.e. is it the new best mode
3729 if (this_rd < best_rd || x->skip) {
3730 if (!mode_excluded) {
3731 // Note index of best mode so far
3732 best_mode_index = mode_index;
3734 if (ref_frame == INTRA_FRAME) {
3735 /* required for left and above block mv */
3736 mbmi->mv[0].as_int = 0;
3739 *returnrate = rate2;
3740 *returndistortion = distortion2;
3742 best_yrd = best_rd -
3743 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3744 best_mbmode = *mbmi;
3745 best_skip2 = this_skip2;
3746 best_partition = *x->partition_info;
3748 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3749 for (i = 0; i < 4; i++)
3750 best_bmodes[i] = xd->mode_info_context->bmi[i];
3752 // TODO(debargha): enhance this test with a better distortion prediction
3753 // based on qp, activity mask and history
3754 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
3755 const int qstep = xd->plane[0].dequant[1];
3756 // TODO(debargha): Enhance this by specializing for each mode_index
3758 if (x->source_variance < UINT_MAX) {
3759 const int var_adjust = (x->source_variance < 16);
3760 scale -= var_adjust;
3762 if (ref_frame > INTRA_FRAME &&
3763 distortion2 * scale < qstep * qstep) {
3770 /* keep record of best compound/single-only prediction */
3771 if (!disable_skip && ref_frame != INTRA_FRAME) {
3772 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3774 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3775 single_rate = rate2 - compmode_cost;
3776 hybrid_rate = rate2;
3778 single_rate = rate2;
3779 hybrid_rate = rate2 + compmode_cost;
3782 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3783 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3785 if (second_ref_frame <= INTRA_FRAME &&
3786 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3787 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3788 } else if (second_ref_frame > INTRA_FRAME &&
3789 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3790 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3792 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3793 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3796 /* keep record of best filter type */
3797 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3798 cm->mcomp_filter_type != BILINEAR) {
3799 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3800 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3801 for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
3803 // In cases of poor prediction, filter_cache[] can contain really big
3804 // values, which actually are bigger than this_rd itself. This can
3805 // cause negative best_filter_rd[] values, which is obviously silly.
3806 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3807 if (cpi->rd_filter_cache[i] >= ref)
3808 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3809 else // FIXME(rbultje) do this for comppred also
3810 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
3811 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3815 /* keep record of best txfm size */
3816 if (bsize < BLOCK_32X32) {
3817 if (bsize < BLOCK_16X16) {
3818 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3819 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
3820 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3822 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3824 if (!mode_excluded && this_rd != INT64_MAX) {
3825 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3826 int64_t adj_rd = INT64_MAX;
3827 if (this_mode != I4X4_PRED) {
3828 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3833 if (adj_rd < best_tx_rd[i])
3834 best_tx_rd[i] = adj_rd;
3841 if (x->skip && !comp_pred)
3845 if (best_rd >= best_rd_so_far)
3848 // If we used an estimate for the uv intra rd in the loop above...
3849 if (cpi->sf.use_uv_intra_rd_estimate) {
3850 // Do Intra UV best rd mode selection if best mode choice above was intra.
3851 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3852 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
3853 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
3854 &rate_uv_tokenonly[uv_tx_size],
3855 &dist_uv[uv_tx_size],
3856 &skip_uv[uv_tx_size],
3857 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
3861 // If indicated then mark the index of the chosen mode to be inspected at
3862 // other block sizes.
3863 if (bsize <= cpi->sf.unused_mode_skip_lvl) {
3864 cpi->unused_mode_skip_mask = cpi->unused_mode_skip_mask &
3865 (~((int64_t)1 << best_mode_index));
3868 // If we are using reference masking and the set mask flag is set then
3869 // create the reference frame mask.
3870 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
3871 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
3873 // Flag all modes that have a distortion thats > 2x the best we found at
3875 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3876 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3879 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3880 ctx->modes_with_high_error |= (1 << mode_index);
3884 // Flag all ref frames that have a distortion thats > 2x the best we found at
3886 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3887 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3888 ctx->frames_with_high_error |= (1 << ref_frame);
3892 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
3893 *returnrate = INT_MAX;
3894 *returndistortion = INT_MAX;
3898 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3899 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3900 (best_mbmode.ref_frame[0] == INTRA_FRAME));
3902 // Updating rd_thresh_freq_fact[] here means that the different
3903 // partition/block sizes are handled independently based on the best
3904 // choice for the current partition. It may well be better to keep a scaled
3905 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3906 // combination that wins out.
3907 if (cpi->sf.adaptive_rd_thresh) {
3908 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3909 if (mode_index == best_mode_index) {
3910 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3911 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
3913 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
3914 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3915 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) {
3916 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3917 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT;
3924 *mbmi = best_mbmode;
3925 x->skip |= best_skip2;
3926 if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
3927 best_mbmode.sb_type < BLOCK_8X8) {
3928 for (i = 0; i < 4; i++)
3929 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3932 if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
3933 best_mbmode.sb_type < BLOCK_8X8) {
3934 for (i = 0; i < 4; i++)
3935 xd->mode_info_context->bmi[i].as_mv[0].as_int =
3936 best_bmodes[i].as_mv[0].as_int;
3938 if (mbmi->ref_frame[1] > 0)
3939 for (i = 0; i < 4; i++)
3940 xd->mode_info_context->bmi[i].as_mv[1].as_int =
3941 best_bmodes[i].as_mv[1].as_int;
3943 *x->partition_info = best_partition;
3945 mbmi->mv[0].as_int = xd->mode_info_context->bmi[3].as_mv[0].as_int;
3946 mbmi->mv[1].as_int = xd->mode_info_context->bmi[3].as_mv[1].as_int;
3949 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3950 if (best_pred_rd[i] == INT64_MAX)
3951 best_pred_diff[i] = INT_MIN;
3953 best_pred_diff[i] = best_rd - best_pred_rd[i];
3957 for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
3958 if (best_filter_rd[i] == INT64_MAX)
3959 best_filter_diff[i] = 0;
3961 best_filter_diff[i] = best_rd - best_filter_rd[i];
3963 if (cm->mcomp_filter_type == SWITCHABLE)
3964 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3966 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
3970 for (i = 0; i < TX_MODES; i++) {
3971 if (best_tx_rd[i] == INT64_MAX)
3972 best_tx_diff[i] = 0;
3974 best_tx_diff[i] = best_rd - best_tx_rd[i];
3977 vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff));
3980 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
3982 store_coding_context(x, ctx, best_mode_index,
3984 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3985 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3986 mbmi->ref_frame[1]][0],
3987 best_pred_diff, best_tx_diff, best_filter_diff);