2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/encoder/vp9_modecosts.h"
21 #include "vp9/encoder/vp9_encodeintra.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_reconinter.h"
24 #include "vp9/common/vp9_reconintra.h"
25 #include "vp9/common/vp9_findnearmv.h"
26 #include "vp9/common/vp9_quant_common.h"
27 #include "vp9/encoder/vp9_encodemb.h"
28 #include "vp9/encoder/vp9_quantize.h"
29 #include "vp9/encoder/vp9_variance.h"
30 #include "vp9/encoder/vp9_mcomp.h"
31 #include "vp9/encoder/vp9_rdopt.h"
32 #include "vp9/encoder/vp9_ratectrl.h"
33 #include "vpx_mem/vpx_mem.h"
34 #include "vp9/common/vp9_systemdependent.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/common/vp9_seg_common.h"
37 #include "vp9/common/vp9_pred_common.h"
38 #include "vp9/common/vp9_entropy.h"
40 #include "vp9/common/vp9_mvref_common.h"
41 #include "vp9/common/vp9_common.h"
43 #define INVALID_MV 0x80008000
45 /* Factor to weigh the rate for switchable interp filters */
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 DECLARE_ALIGNED(16, extern const uint8_t,
49 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
51 #define I4X4_PRED 0x8000
52 #define SPLITMV 0x10000
54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55 {NEARESTMV, LAST_FRAME, NONE},
56 {DC_PRED, INTRA_FRAME, NONE},
58 {NEARESTMV, ALTREF_FRAME, NONE},
59 {NEARESTMV, GOLDEN_FRAME, NONE},
60 {NEWMV, LAST_FRAME, NONE},
61 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
62 {NEARMV, LAST_FRAME, NONE},
63 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
65 {NEWMV, GOLDEN_FRAME, NONE},
66 {NEWMV, ALTREF_FRAME, NONE},
67 {NEARMV, ALTREF_FRAME, NONE},
69 {TM_PRED, INTRA_FRAME, NONE},
71 {NEARMV, LAST_FRAME, ALTREF_FRAME},
72 {NEWMV, LAST_FRAME, ALTREF_FRAME},
73 {NEARMV, GOLDEN_FRAME, NONE},
74 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
75 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
77 {SPLITMV, LAST_FRAME, NONE},
78 {SPLITMV, GOLDEN_FRAME, NONE},
79 {SPLITMV, ALTREF_FRAME, NONE},
80 {SPLITMV, LAST_FRAME, ALTREF_FRAME},
81 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
83 {ZEROMV, LAST_FRAME, NONE},
84 {ZEROMV, GOLDEN_FRAME, NONE},
85 {ZEROMV, ALTREF_FRAME, NONE},
86 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
87 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
89 {I4X4_PRED, INTRA_FRAME, NONE},
90 {H_PRED, INTRA_FRAME, NONE},
91 {V_PRED, INTRA_FRAME, NONE},
92 {D135_PRED, INTRA_FRAME, NONE},
93 {D27_PRED, INTRA_FRAME, NONE},
94 {D153_PRED, INTRA_FRAME, NONE},
95 {D63_PRED, INTRA_FRAME, NONE},
96 {D117_PRED, INTRA_FRAME, NONE},
97 {D45_PRED, INTRA_FRAME, NONE},
100 // The baseline rd thresholds for breaking out of the rd loop for
101 // certain modes are assumed to be based on 8x8 blocks.
102 // This table is used to correct for blocks size.
103 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
104 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
105 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
107 #define BASE_RD_THRESH_FREQ_FACT 16
108 #define MAX_RD_THRESH_FREQ_FACT 32
109 #define MAX_RD_THRESH_FREQ_INC 1
111 static void fill_token_costs(vp9_coeff_cost *c,
112 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
115 for (t = TX_4X4; t <= TX_32X32; t++)
116 for (i = 0; i < BLOCK_TYPES; i++)
117 for (j = 0; j < REF_TYPES; j++)
118 for (k = 0; k < COEF_BANDS; k++)
119 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
120 vp9_prob probs[ENTROPY_NODES];
121 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
122 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
124 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
126 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
127 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
131 static const int rd_iifactor[32] = {
132 4, 4, 3, 2, 1, 0, 0, 0,
133 0, 0, 0, 0, 0, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 0, 0, 0, 0,
138 // 3* dc_qlookup[Q]*dc_qlookup[Q];
140 /* values are now correlated to quantizer */
141 static int sad_per_bit16lut[QINDEX_RANGE];
142 static int sad_per_bit4lut[QINDEX_RANGE];
144 void vp9_init_me_luts() {
147 // Initialize the sad lut tables using a formulaic calculation for now
148 // This is to make it easier to resolve the impact of experimental changes
149 // to the quantizer tables.
150 for (i = 0; i < QINDEX_RANGE; i++) {
151 sad_per_bit16lut[i] =
152 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
153 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
157 static int compute_rd_mult(int qindex) {
158 const int q = vp9_dc_quant(qindex, 0);
159 return (11 * q * q) >> 2;
162 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
163 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
164 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
168 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
171 vp9_clear_system_state(); // __asm emms;
173 // Further tests required to see if optimum is different
174 // for key frames, golden frames and arf frames.
175 // if (cpi->common.refresh_golden_frame ||
176 // cpi->common.refresh_alt_ref_frame)
177 qindex = clamp(qindex, 0, MAXQ);
179 cpi->RDMULT = compute_rd_mult(qindex);
180 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
181 if (cpi->twopass.next_iiratio > 31)
182 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
185 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
187 cpi->mb.errorperbit = cpi->RDMULT >> 6;
188 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
190 vp9_set_speed_features(cpi);
192 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
197 if (cpi->RDMULT > 1000) {
201 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
202 for (i = 0; i < MAX_MODES; ++i) {
203 // Threshold here seem unecessarily harsh but fine given actual
204 // range of values used for cpi->sf.thresh_mult[]
205 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
207 // *4 relates to the scaling of rd_thresh_block_size_factor[]
208 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
209 cpi->rd_threshes[bsize][i] =
210 cpi->sf.thresh_mult[i] * q *
211 rd_thresh_block_size_factor[bsize] / (4 * 100);
213 cpi->rd_threshes[bsize][i] = INT_MAX;
215 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
217 if (cpi->sf.adaptive_rd_thresh)
218 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
220 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
226 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
227 for (i = 0; i < MAX_MODES; i++) {
228 // Threshold here seem unecessarily harsh but fine given actual
229 // range of values used for cpi->sf.thresh_mult[]
230 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
232 if (cpi->sf.thresh_mult[i] < thresh_max) {
233 cpi->rd_threshes[bsize][i] =
234 cpi->sf.thresh_mult[i] * q *
235 rd_thresh_block_size_factor[bsize] / 4;
237 cpi->rd_threshes[bsize][i] = INT_MAX;
239 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
241 if (cpi->sf.adaptive_rd_thresh)
242 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
244 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
249 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
251 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
252 vp9_cost_tokens(cpi->mb.partition_cost[i],
253 cpi->common.fc.partition_prob[cpi->common.frame_type][i],
256 /*rough estimate for costing*/
257 vp9_init_mode_costs(cpi);
259 if (cpi->common.frame_type != KEY_FRAME) {
260 vp9_build_nmv_cost_table(
261 cpi->mb.nmvjointcost,
262 cpi->mb.e_mbd.allow_high_precision_mv ?
263 cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
264 &cpi->common.fc.nmvc,
265 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
267 for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
268 MB_PREDICTION_MODE m;
270 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
271 cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
272 cost_token(vp9_inter_mode_tree,
273 cpi->common.fc.inter_mode_probs[i],
274 vp9_inter_mode_encodings - NEARESTMV + m);
279 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
280 const double *tab1, const double *tab2,
281 double *v1, double *v2) {
282 double y = x * inv_step;
285 *v1 = tab1[ntab - 1];
286 *v2 = tab2[ntab - 1];
289 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
290 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
294 static void model_rd_norm(double x, double *R, double *D) {
295 static const int inv_tab_step = 8;
296 static const int tab_size = 120;
297 // NOTE: The tables below must be of the same size
300 // This table models the rate for a Laplacian source
301 // source with given variance when quantized with a uniform quantizer
302 // with given stepsize. The closed form expression is:
303 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
304 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
305 // and H(x) is the binary entropy function.
306 static const double rate_tab[] = {
307 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
308 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
309 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
310 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
311 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
312 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
313 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
314 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
315 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
316 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
317 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
318 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
319 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
320 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
321 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
323 // Normalized distortion
324 // This table models the normalized distortion for a Laplacian source
325 // source with given variance when quantized with a uniform quantizer
326 // with given stepsize. The closed form expression is:
327 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
328 // where x = qpstep / sqrt(variance)
329 // Note the actual distortion is Dn * variance.
330 static const double dist_tab[] = {
331 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
332 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
333 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
334 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
335 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
336 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
337 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
338 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
339 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
340 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
341 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
342 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
343 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
344 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
345 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
348 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
349 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
350 assert(sizeof(rate_tab) == sizeof(dist_tab));
353 linear_interpolate2(x, tab_size, inv_tab_step,
354 rate_tab, dist_tab, R, D);
357 static void model_rd_from_var_lapndz(int var, int n, int qstep,
358 int *rate, int64_t *dist) {
359 // This function models the rate and distortion for a Laplacian
360 // source with given variance when quantized with a uniform quantizer
361 // with given stepsize. The closed form expressions are in:
362 // Hang and Chen, "Source Model for transform video coder and its
363 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
364 // Sys. for Video Tech., April 1997.
365 vp9_clear_system_state();
366 if (var == 0 || n == 0) {
371 double s2 = (double) var / n;
372 double x = qstep / sqrt(s2);
373 model_rd_norm(x, &R, &D);
374 *rate = ((n << 8) * R + 0.5);
375 *dist = (var * D + 0.5);
377 vp9_clear_system_state();
380 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
381 MACROBLOCK *x, MACROBLOCKD *xd,
382 int *out_rate_sum, int64_t *out_dist_sum) {
383 // Note our transform coeffs are 8 times an orthogonal transform.
384 // Hence quantizer step is also 8 times. To get effective quantizer
385 // we need to divide by 8 before sending to modeling function.
386 int i, rate_sum = 0, dist_sum = 0;
388 for (i = 0; i < MAX_MB_PLANE; ++i) {
389 struct macroblock_plane *const p = &x->plane[i];
390 struct macroblockd_plane *const pd = &xd->plane[i];
391 const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
395 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
396 pd->dst.buf, pd->dst.stride, &sse);
397 // sse works better than var, since there is no dc prediction used
398 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
399 pd->dequant[1] >> 3, &rate, &dist);
405 *out_rate_sum = rate_sum;
406 *out_dist_sum = dist_sum << 4;
409 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
411 MACROBLOCK *x, MACROBLOCKD *xd,
412 int *out_rate_sum, int64_t *out_dist_sum,
415 BLOCK_SIZE_TYPE bs = BLOCK_4X4;
416 struct macroblock_plane *const p = &x->plane[0];
417 struct macroblockd_plane *const pd = &xd->plane[0];
418 const int width = plane_block_width(bsize, pd);
419 const int height = plane_block_height(bsize, pd);
421 int64_t dist_sum = 0;
423 if (tx_size == TX_4X4) {
426 } else if (tx_size == TX_8X8) {
429 } else if (tx_size == TX_16X16) {
432 } else if (tx_size == TX_32X32) {
439 for (j = 0; j < height; j += t) {
440 for (k = 0; k < width; k += t) {
444 (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
446 pd->dst.buf + j * pd->dst.stride + k,
447 pd->dst.stride, &sse);
448 // sse works better than var, since there is no dc prediction used
449 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
453 *out_skip &= (rate < 1024);
456 *out_rate_sum = rate_sum;
457 *out_dist_sum = (dist_sum << 4);
460 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
461 intptr_t block_size, int64_t *ssz) {
463 int64_t error = 0, sqcoeff = 0;
465 for (i = 0; i < block_size; i++) {
466 int this_diff = coeff[i] - dqcoeff[i];
467 error += (unsigned)this_diff * this_diff;
468 sqcoeff += (unsigned) coeff[i] * coeff[i];
475 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
476 * decide whether to include cost of a trailing EOB node or not (i.e. we
477 * can skip this if the last coefficient in this transform block, e.g. the
478 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
480 static const int16_t band_counts[TX_SIZES][8] = {
481 { 1, 2, 3, 4, 3, 16 - 13, 0 },
482 { 1, 2, 3, 4, 11, 64 - 21, 0 },
483 { 1, 2, 3, 4, 11, 256 - 21, 0 },
484 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
487 static INLINE int cost_coeffs(MACROBLOCK *mb,
488 int plane, int block, PLANE_TYPE type,
489 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
491 const int16_t *scan, const int16_t *nb) {
492 MACROBLOCKD *const xd = &mb->e_mbd;
493 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
495 const int16_t *band_count = &band_counts[tx_size][1];
496 const int eob = xd->plane[plane].eobs[block];
497 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block);
498 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
499 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
500 [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
501 ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
502 uint8_t token_cache[1024];
504 // Check for consistency of tx_size with mode info
505 assert((!type && !plane) || (type && plane));
506 if (type == PLANE_TYPE_Y_WITH_DC) {
507 assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
509 assert(tx_size == get_uv_tx_size(mbmi));
512 pt = combine_entropy_contexts(above_ec, left_ec);
516 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
519 int v, prev_t, band_left = *band_count++;
523 prev_t = vp9_dct_value_tokens_ptr[v].token;
524 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
525 token_cache[0] = vp9_pt_energy_class[prev_t];
529 for (c = 1; c < eob; c++) {
530 const int rc = scan[c];
534 t = vp9_dct_value_tokens_ptr[v].token;
535 pt = get_coef_context(nb, token_cache, c);
536 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
537 token_cache[rc] = vp9_pt_energy_class[t];
540 band_left = *band_count++;
547 pt = get_coef_context(nb, token_cache, c);
548 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
552 // is eob first coefficient;
558 struct rdcost_block_args {
561 ENTROPY_CONTEXT t_above[16];
562 ENTROPY_CONTEXT t_left[16];
571 const int16_t *scan, *nb;
574 static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
575 int ss_txfrm_size, void *arg) {
576 struct rdcost_block_args* args = arg;
577 MACROBLOCK* const x = args->x;
578 MACROBLOCKD* const xd = &x->e_mbd;
579 struct macroblock_plane *const p = &x->plane[plane];
580 struct macroblockd_plane *const pd = &xd->plane[plane];
582 int shift = args->tx_size == TX_32X32 ? 0 : 2;
583 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
584 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
585 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
587 args->sse += this_sse >> shift;
589 if (x->skip_encode &&
590 xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
591 // TODO(jingning): tune the model to better capture the distortion.
592 int64_t p = (pd->dequant[1] * pd->dequant[1] *
593 (1 << ss_txfrm_size)) >> shift;
599 static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
600 int ss_txfrm_size, void *arg) {
601 struct rdcost_block_args* args = arg;
602 MACROBLOCKD *const xd = &args->x->e_mbd;
604 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size,
607 args->rate += cost_coeffs(args->x, plane, block,
608 xd->plane[plane].plane_type, args->t_above + x_idx,
609 args->t_left + y_idx, args->tx_size,
610 args->scan, args->nb);
613 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
614 int ss_txfrm_size, void *arg) {
615 struct rdcost_block_args *args = arg;
616 MACROBLOCK *const x = args->x;
617 MACROBLOCKD *const xd = &x->e_mbd;
618 struct encode_b_args encode_args = {args->cm, x, NULL};
619 int64_t rd1, rd2, rd;
623 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
624 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
626 if (rd > args->best_rd) {
628 args->rate = INT_MAX;
629 args->dist = INT64_MAX;
630 args->sse = INT64_MAX;
634 if (!is_inter_block(&xd->mode_info_context->mbmi))
635 encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
637 xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);
639 dist_block(plane, block, bsize, ss_txfrm_size, args);
640 rate_block(plane, block, bsize, ss_txfrm_size, args);
643 static void txfm_rd_in_plane(VP9_COMMON *const cm, MACROBLOCK *x,
644 int *rate, int64_t *distortion,
645 int *skippable, int64_t *sse,
646 int64_t ref_best_rd, int plane,
647 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
648 MACROBLOCKD *const xd = &x->e_mbd;
649 struct macroblockd_plane *const pd = &xd->plane[plane];
650 const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
651 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
652 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
654 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
655 num_4x4_blocks_wide, num_4x4_blocks_high,
656 0, 0, 0, ref_best_rd, 0 };
658 xd->mode_info_context->mbmi.txfm_size = tx_size;
662 vpx_memcpy(&args.t_above, pd->above_context,
663 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
664 vpx_memcpy(&args.t_left, pd->left_context,
665 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
666 get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
667 &args.scan, &args.nb);
670 for (i = 0; i < num_4x4_blocks_wide; i += 2)
671 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
672 for (i = 0; i < num_4x4_blocks_high; i += 2)
673 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
674 get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
675 &args.scan, &args.nb);
678 for (i = 0; i < num_4x4_blocks_wide; i += 4)
679 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
680 for (i = 0; i < num_4x4_blocks_high; i += 4)
681 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
682 get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
683 &args.scan, &args.nb);
686 for (i = 0; i < num_4x4_blocks_wide; i += 8)
687 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
688 for (i = 0; i < num_4x4_blocks_high; i += 8)
689 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
690 args.scan = vp9_default_scan_32x32;
691 args.nb = vp9_default_scan_32x32_neighbors;
697 foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
698 *distortion = args.dist;
701 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip);
704 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
705 int *rate, int64_t *distortion,
706 int *skip, int64_t *sse,
708 BLOCK_SIZE_TYPE bs) {
709 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
710 VP9_COMMON *const cm = &cpi->common;
711 MACROBLOCKD *const xd = &x->e_mbd;
712 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
713 if (max_txfm_size == TX_32X32 &&
714 (cm->tx_mode == ALLOW_32X32 ||
715 cm->tx_mode == TX_MODE_SELECT)) {
716 mbmi->txfm_size = TX_32X32;
717 } else if (max_txfm_size >= TX_16X16 &&
718 (cm->tx_mode == ALLOW_16X16 ||
719 cm->tx_mode == ALLOW_32X32 ||
720 cm->tx_mode == TX_MODE_SELECT)) {
721 mbmi->txfm_size = TX_16X16;
722 } else if (cm->tx_mode != ONLY_4X4) {
723 mbmi->txfm_size = TX_8X8;
725 mbmi->txfm_size = TX_4X4;
727 txfm_rd_in_plane(cm, x, rate, distortion, skip,
728 &sse[mbmi->txfm_size], ref_best_rd, 0, bs,
730 cpi->txfm_stepdown_count[0]++;
733 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
734 int (*r)[2], int *rate,
735 int64_t *d, int64_t *distortion,
737 int64_t tx_cache[TX_MODES],
738 BLOCK_SIZE_TYPE bs) {
739 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
740 VP9_COMMON *const cm = &cpi->common;
741 MACROBLOCKD *const xd = &x->e_mbd;
742 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
743 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
744 int64_t rd[TX_SIZES][2];
748 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
750 for (n = TX_4X4; n <= max_tx_size; n++) {
752 if (r[n][0] == INT_MAX)
754 for (m = 0; m <= n - (n == max_tx_size); m++) {
756 r[n][1] += vp9_cost_zero(tx_probs[m]);
758 r[n][1] += vp9_cost_one(tx_probs[m]);
762 assert(skip_prob > 0);
763 s0 = vp9_cost_bit(skip_prob, 0);
764 s1 = vp9_cost_bit(skip_prob, 1);
766 for (n = TX_4X4; n <= max_tx_size; n++) {
767 if (d[n] == INT64_MAX) {
768 rd[n][0] = rd[n][1] = INT64_MAX;
772 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
774 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
775 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
779 if (max_tx_size == TX_32X32 &&
780 (cm->tx_mode == ALLOW_32X32 ||
781 (cm->tx_mode == TX_MODE_SELECT &&
782 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
783 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
784 mbmi->txfm_size = TX_32X32;
785 } else if (max_tx_size >= TX_16X16 &&
786 (cm->tx_mode == ALLOW_16X16 ||
787 cm->tx_mode == ALLOW_32X32 ||
788 (cm->tx_mode == TX_MODE_SELECT &&
789 rd[TX_16X16][1] < rd[TX_8X8][1] &&
790 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
791 mbmi->txfm_size = TX_16X16;
792 } else if (cm->tx_mode == ALLOW_8X8 ||
793 cm->tx_mode == ALLOW_16X16 ||
794 cm->tx_mode == ALLOW_32X32 ||
795 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
796 mbmi->txfm_size = TX_8X8;
798 mbmi->txfm_size = TX_4X4;
801 *distortion = d[mbmi->txfm_size];
802 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
803 *skip = s[mbmi->txfm_size];
805 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
806 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
807 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
808 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
809 if (max_tx_size == TX_32X32 &&
810 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
811 rd[TX_32X32][1] < rd[TX_4X4][1])
812 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
813 else if (max_tx_size >= TX_16X16 &&
814 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
815 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
817 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
818 rd[TX_4X4][1] : rd[TX_8X8][1];
820 if (max_tx_size == TX_32X32 &&
821 rd[TX_32X32][1] < rd[TX_16X16][1] &&
822 rd[TX_32X32][1] < rd[TX_8X8][1] &&
823 rd[TX_32X32][1] < rd[TX_4X4][1]) {
824 cpi->txfm_stepdown_count[0]++;
825 } else if (max_tx_size >= TX_16X16 &&
826 rd[TX_16X16][1] < rd[TX_8X8][1] &&
827 rd[TX_16X16][1] < rd[TX_4X4][1]) {
828 cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
829 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
830 cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
832 cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
836 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
837 int (*r)[2], int *rate,
838 int64_t *d, int64_t *distortion,
839 int *s, int *skip, int64_t *sse,
841 BLOCK_SIZE_TYPE bs) {
842 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
843 VP9_COMMON *const cm = &cpi->common;
844 MACROBLOCKD *const xd = &x->e_mbd;
845 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
846 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
847 int64_t rd[TX_SIZES][2];
850 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
851 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
853 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
855 // for (n = TX_4X4; n <= max_txfm_size; n++)
856 // r[n][0] = (r[n][0] * scale_r[n]);
858 for (n = TX_4X4; n <= max_txfm_size; n++) {
860 for (m = 0; m <= n - (n == max_txfm_size); m++) {
862 r[n][1] += vp9_cost_zero(tx_probs[m]);
864 r[n][1] += vp9_cost_one(tx_probs[m]);
868 assert(skip_prob > 0);
869 s0 = vp9_cost_bit(skip_prob, 0);
870 s1 = vp9_cost_bit(skip_prob, 1);
872 for (n = TX_4X4; n <= max_txfm_size; n++) {
874 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
876 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
877 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
880 for (n = TX_4X4; n <= max_txfm_size; n++) {
881 rd[n][0] = (scale_rd[n] * rd[n][0]);
882 rd[n][1] = (scale_rd[n] * rd[n][1]);
885 if (max_txfm_size == TX_32X32 &&
886 (cm->tx_mode == ALLOW_32X32 ||
887 (cm->tx_mode == TX_MODE_SELECT &&
888 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
889 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
890 rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
891 mbmi->txfm_size = TX_32X32;
892 } else if (max_txfm_size >= TX_16X16 &&
893 (cm->tx_mode == ALLOW_16X16 ||
894 cm->tx_mode == ALLOW_32X32 ||
895 (cm->tx_mode == TX_MODE_SELECT &&
896 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
897 rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
898 mbmi->txfm_size = TX_16X16;
899 } else if (cm->tx_mode == ALLOW_8X8 ||
900 cm->tx_mode == ALLOW_16X16 ||
901 cm->tx_mode == ALLOW_32X32 ||
902 (cm->tx_mode == TX_MODE_SELECT &&
903 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
904 mbmi->txfm_size = TX_8X8;
906 mbmi->txfm_size = TX_4X4;
909 // Actually encode using the chosen mode if a model was used, but do not
910 // update the r, d costs
911 txfm_rd_in_plane(cm, x, rate, distortion, skip, &sse[mbmi->txfm_size],
912 ref_best_rd, 0, bs, mbmi->txfm_size);
914 if (max_txfm_size == TX_32X32 &&
915 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
916 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
917 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
918 cpi->txfm_stepdown_count[0]++;
919 } else if (max_txfm_size >= TX_16X16 &&
920 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
921 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
922 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
923 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
924 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
926 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
930 static void super_block_yrd(VP9_COMP *cpi,
931 MACROBLOCK *x, int *rate, int64_t *distortion,
932 int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
933 int64_t txfm_cache[TX_MODES],
934 int64_t ref_best_rd) {
935 VP9_COMMON *const cm = &cpi->common;
936 int r[TX_SIZES][2], s[TX_SIZES];
937 int64_t d[TX_SIZES], sse[TX_SIZES];
938 MACROBLOCKD *xd = &x->e_mbd;
939 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
941 assert(bs == mbmi->sb_type);
942 if (mbmi->ref_frame[0] > INTRA_FRAME)
943 vp9_subtract_sby(x, bs);
945 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
946 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
947 mbmi->ref_frame[0] == INTRA_FRAME)) {
948 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
949 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
952 *psse = sse[mbmi->txfm_size];
956 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
957 mbmi->ref_frame[0] > INTRA_FRAME) {
958 if (bs >= BLOCK_32X32)
959 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
960 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
961 if (bs >= BLOCK_16X16)
962 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
963 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
965 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
966 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
968 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
969 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
971 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
972 skip, sse, ref_best_rd, bs);
974 if (bs >= BLOCK_32X32)
975 txfm_rd_in_plane(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
976 &sse[TX_32X32], ref_best_rd, 0, bs, TX_32X32);
977 if (bs >= BLOCK_16X16)
978 txfm_rd_in_plane(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
979 &sse[TX_16X16], ref_best_rd, 0, bs, TX_16X16);
980 txfm_rd_in_plane(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
981 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
982 txfm_rd_in_plane(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
983 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
984 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
985 skip, txfm_cache, bs);
988 *psse = sse[mbmi->txfm_size];
991 static int conditional_skipintra(MB_PREDICTION_MODE mode,
992 MB_PREDICTION_MODE best_intra_mode) {
993 if (mode == D117_PRED &&
994 best_intra_mode != V_PRED &&
995 best_intra_mode != D135_PRED)
997 if (mode == D63_PRED &&
998 best_intra_mode != V_PRED &&
999 best_intra_mode != D45_PRED)
1001 if (mode == D27_PRED &&
1002 best_intra_mode != H_PRED &&
1003 best_intra_mode != D45_PRED)
1005 if (mode == D153_PRED &&
1006 best_intra_mode != H_PRED &&
1007 best_intra_mode != D135_PRED)
1012 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1013 MB_PREDICTION_MODE *best_mode,
1015 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1016 int *bestrate, int *bestratey,
1017 int64_t *bestdistortion,
1018 BLOCK_SIZE_TYPE bsize,
1019 int64_t rd_thresh) {
1020 MB_PREDICTION_MODE mode;
1021 MACROBLOCKD *xd = &x->e_mbd;
1022 int64_t best_rd = rd_thresh;
1025 struct macroblock_plane *p = &x->plane[0];
1026 struct macroblockd_plane *pd = &xd->plane[0];
1027 const int src_stride = p->src.stride;
1028 const int dst_stride = pd->dst.stride;
1029 uint8_t *src_init = raster_block_offset_uint8(xd, BLOCK_8X8, 0, ib,
1030 p->src.buf, src_stride);
1031 uint8_t *dst_init = raster_block_offset_uint8(xd, BLOCK_8X8, 0, ib,
1032 pd->dst.buf, dst_stride);
1033 int16_t *src_diff, *coeff;
1035 ENTROPY_CONTEXT ta[2], tempa[2];
1036 ENTROPY_CONTEXT tl[2], templ[2];
1037 TX_TYPE tx_type = DCT_DCT;
1038 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1039 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1040 int idx, idy, block;
1041 uint8_t best_dst[8 * 8];
1045 vpx_memcpy(ta, a, sizeof(ta));
1046 vpx_memcpy(tl, l, sizeof(tl));
1047 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
1049 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1052 // Only do the oblique modes if the best so far is
1053 // one of the neighboring directional modes
1054 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1055 if (conditional_skipintra(mode, *best_mode))
1059 rate = bmode_costs[mode];
1062 vpx_memcpy(tempa, ta, sizeof(ta));
1063 vpx_memcpy(templ, tl, sizeof(tl));
1065 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1066 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1068 const int16_t *scan;
1069 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1070 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1072 block = ib + idy * 2 + idx;
1073 xd->mode_info_context->bmi[block].as_mode = mode;
1074 src_diff = raster_block_offset_int16(xd, BLOCK_8X8, 0, block,
1076 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1077 vp9_predict_intra_block(xd, block, 1,
1079 x->skip_encode ? src : dst,
1080 x->skip_encode ? src_stride : dst_stride,
1082 vp9_subtract_block(4, 4, src_diff, 8,
1086 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1087 if (tx_type != DCT_DCT) {
1088 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1089 x->quantize_b_4x4(x, block, tx_type, 16);
1091 x->fwd_txm4x4(src_diff, coeff, 16);
1092 x->quantize_b_4x4(x, block, tx_type, 16);
1095 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
1096 ratey += cost_coeffs(x, 0, block, PLANE_TYPE_Y_WITH_DC,
1097 tempa + idx, templ + idy, TX_4X4, scan,
1098 vp9_get_coef_neighbors_handle(scan));
1099 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1101 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1104 if (tx_type != DCT_DCT)
1105 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1106 dst, pd->dst.stride, tx_type);
1108 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1109 dst, pd->dst.stride);
1114 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1116 if (this_rd < best_rd) {
1119 *bestdistortion = distortion;
1122 vpx_memcpy(a, tempa, sizeof(tempa));
1123 vpx_memcpy(l, templ, sizeof(templ));
1124 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1125 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1126 num_4x4_blocks_wide * 4);
1132 if (best_rd >= rd_thresh || x->skip_encode)
1135 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1136 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1137 num_4x4_blocks_wide * 4);
1142 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1143 MACROBLOCK * const mb,
1146 int64_t * const distortion,
1149 MACROBLOCKD *const xd = &mb->e_mbd;
1150 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1151 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1152 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1155 int64_t total_distortion = 0;
1157 int64_t total_rd = 0;
1158 ENTROPY_CONTEXT t_above[4], t_left[4];
1160 MODE_INFO *const mic = xd->mode_info_context;
1162 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1163 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1165 bmode_costs = mb->mbmode_cost;
1167 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1168 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1169 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1170 const int mis = xd->mode_info_stride;
1171 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
1172 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
1173 int64_t UNINITIALIZED_IS_SAFE(d), this_rd;
1176 if (cpi->common.frame_type == KEY_FRAME) {
1177 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
1178 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
1179 left_block_mode(mic, i) : DC_PRED;
1181 bmode_costs = mb->y_mode_costs[A][L];
1184 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1185 t_above + idx, t_left + idy, &r, &ry, &d,
1186 bsize, best_rd - total_rd);
1187 if (this_rd >= best_rd - total_rd)
1190 total_rd += this_rd;
1192 total_distortion += d;
1195 mic->bmi[i].as_mode = best_mode;
1196 for (j = 1; j < num_4x4_blocks_high; ++j)
1197 mic->bmi[i + j * 2].as_mode = best_mode;
1198 for (j = 1; j < num_4x4_blocks_wide; ++j)
1199 mic->bmi[i + j].as_mode = best_mode;
1201 if (total_rd >= best_rd)
1207 *rate_y = tot_rate_y;
1208 *distortion = total_distortion;
1209 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode;
1211 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1214 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1215 int *rate, int *rate_tokenonly,
1216 int64_t *distortion, int *skippable,
1217 BLOCK_SIZE_TYPE bsize,
1218 int64_t tx_cache[TX_MODES],
1220 MB_PREDICTION_MODE mode;
1221 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
1222 MACROBLOCKD *const xd = &x->e_mbd;
1223 int this_rate, this_rate_tokenonly, s;
1224 int64_t this_distortion, this_rd;
1225 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
1227 int *bmode_costs = x->mbmode_cost;
1229 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1230 for (i = 0; i < TX_MODES; i++)
1231 tx_cache[i] = INT64_MAX;
1233 /* Y Search for intra prediction mode */
1234 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1235 int64_t local_tx_cache[TX_MODES];
1236 MODE_INFO *const mic = xd->mode_info_context;
1237 const int mis = xd->mode_info_stride;
1239 if (cpi->common.frame_type == KEY_FRAME) {
1240 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
1241 const MB_PREDICTION_MODE L = xd->left_available ?
1242 left_block_mode(mic, 0) : DC_PRED;
1244 bmode_costs = x->y_mode_costs[A][L];
1246 x->e_mbd.mode_info_context->mbmi.mode = mode;
1248 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1249 bsize, local_tx_cache, best_rd);
1251 if (this_rate_tokenonly == INT_MAX)
1254 this_rate = this_rate_tokenonly + bmode_costs[mode];
1255 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1257 if (this_rd < best_rd) {
1258 mode_selected = mode;
1260 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size;
1262 *rate_tokenonly = this_rate_tokenonly;
1263 *distortion = this_distortion;
1267 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1268 for (i = 0; i < TX_MODES; i++) {
1269 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1270 local_tx_cache[cpi->common.tx_mode];
1271 if (adj_rd < tx_cache[i]) {
1272 tx_cache[i] = adj_rd;
1278 x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
1279 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
1284 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
1285 int *rate, int64_t *distortion, int *skippable,
1286 int64_t *sse, BLOCK_SIZE_TYPE bsize) {
1287 MACROBLOCKD *const xd = &x->e_mbd;
1288 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1289 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1291 int pnrate = 0, pnskip = 1;
1292 int64_t pndist = 0, pnsse = 0;
1294 if (is_inter_block(mbmi))
1295 vp9_subtract_sbuv(x, bsize);
1302 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1303 txfm_rd_in_plane(cm, x, &pnrate, &pndist, &pnskip, &pnsse,
1304 INT64_MAX, plane, bsize, uv_txfm_size);
1306 *distortion += pndist;
1308 *skippable &= pnskip;
1312 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1313 int *rate, int *rate_tokenonly,
1314 int64_t *distortion, int *skippable,
1315 BLOCK_SIZE_TYPE bsize) {
1316 MB_PREDICTION_MODE mode;
1317 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
1318 int64_t best_rd = INT64_MAX, this_rd;
1319 int this_rate_tokenonly, this_rate, s;
1320 int64_t this_distortion, this_sse;
1322 MB_PREDICTION_MODE last_mode = bsize <= BLOCK_8X8 ?
1323 TM_PRED : cpi->sf.last_chroma_intra_mode;
1325 for (mode = DC_PRED; mode <= last_mode; mode++) {
1326 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
1327 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
1328 &this_distortion, &s, &this_sse, bsize);
1329 this_rate = this_rate_tokenonly +
1330 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1331 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1333 if (this_rd < best_rd) {
1334 mode_selected = mode;
1337 *rate_tokenonly = this_rate_tokenonly;
1338 *distortion = this_distortion;
1343 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
1348 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1349 int *rate, int *rate_tokenonly,
1350 int64_t *distortion, int *skippable,
1351 BLOCK_SIZE_TYPE bsize) {
1355 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1356 super_block_uvrd(&cpi->common, x, rate_tokenonly,
1357 distortion, skippable, &this_sse, bsize);
1358 *rate = *rate_tokenonly +
1359 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1360 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1365 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1366 int *rate_uv, int *rate_uv_tokenonly,
1367 int64_t *dist_uv, int *skip_uv,
1368 MB_PREDICTION_MODE *mode_uv) {
1369 MACROBLOCK *const x = &cpi->mb;
1371 // Use an estimated rd for uv_intra based on DC_PRED if the
1372 // appropriate speed flag is set.
1373 if (cpi->sf.use_uv_intra_rd_estimate) {
1374 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1375 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1376 // Else do a proper rd search for each possible transform size that may
1377 // be considered in the main rd loop.
1379 rd_pick_intra_sbuv_mode(cpi, x,
1380 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1381 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1383 *mode_uv = x->e_mbd.mode_info_context->mbmi.uv_mode;
1386 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1388 MACROBLOCK *const x = &cpi->mb;
1389 MACROBLOCKD *const xd = &x->e_mbd;
1390 const int segment_id = xd->mode_info_context->mbmi.segment_id;
1392 // Don't account for mode here if segment skip is enabled.
1393 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1394 assert(is_inter_mode(mode));
1395 return x->inter_mode_cost[mode_context][mode - NEARESTMV];
1401 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1402 x->e_mbd.mode_info_context->mbmi.mode = mb;
1403 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
1406 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1407 BLOCK_SIZE_TYPE bsize,
1409 int mi_row, int mi_col,
1410 int_mv single_newmv[MAX_REF_FRAMES],
1412 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1413 BLOCK_SIZE_TYPE bsize,
1414 int mi_row, int mi_col,
1415 int_mv *tmp_mv, int *rate_mv);
1417 static int labels2mode(MACROBLOCK *x, int i,
1418 MB_PREDICTION_MODE this_mode,
1419 int_mv *this_mv, int_mv *this_second_mv,
1420 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1421 int_mv seg_mvs[MAX_REF_FRAMES],
1422 int_mv *best_ref_mv,
1423 int_mv *second_best_ref_mv,
1424 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1425 MACROBLOCKD *const xd = &x->e_mbd;
1426 MODE_INFO *const mic = xd->mode_info_context;
1427 MB_MODE_INFO * mbmi = &mic->mbmi;
1428 int cost = 0, thismvcost = 0;
1430 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1431 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1433 /* We have to be careful retrieving previously-encoded motion vectors.
1434 Ones from this macroblock have to be pulled from the BLOCKD array
1435 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1436 MB_PREDICTION_MODE m;
1438 // the only time we should do costing for new motion vector or mode
1439 // is when we are on a new label (jbb May 08, 2007)
1440 switch (m = this_mode) {
1442 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1443 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1445 if (mbmi->ref_frame[1] > 0) {
1446 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1447 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1448 mvjcost, mvcost, 102);
1452 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1453 if (mbmi->ref_frame[1] > 0)
1454 this_second_mv->as_int =
1455 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1458 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1459 if (mbmi->ref_frame[1] > 0)
1460 this_second_mv->as_int =
1461 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1464 this_mv->as_int = 0;
1465 if (mbmi->ref_frame[1] > 0)
1466 this_second_mv->as_int = 0;
1472 cost = cost_mv_ref(cpi, this_mode,
1473 mbmi->mode_context[mbmi->ref_frame[0]]);
1475 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1476 if (mbmi->ref_frame[1] > 0)
1477 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1479 x->partition_info->bmi[i].mode = m;
1480 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1481 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1482 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1483 &mic->bmi[i], sizeof(mic->bmi[i]));
1489 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1494 int64_t *distortion, int64_t *sse,
1495 ENTROPY_CONTEXT *ta,
1496 ENTROPY_CONTEXT *tl) {
1498 MACROBLOCKD *xd = &x->e_mbd;
1499 struct macroblockd_plane *const pd = &xd->plane[0];
1500 MODE_INFO *const mi = xd->mode_info_context;
1501 const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
1502 const int width = plane_block_width(bsize, pd);
1503 const int height = plane_block_height(bsize, pd);
1505 const int src_stride = x->plane[0].src.stride;
1506 uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_8X8, 0, i,
1507 x->plane[0].src.buf,
1509 int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_8X8, 0, i,
1510 x->plane[0].src_diff);
1511 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, i);
1512 uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_8X8, 0, i,
1515 uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_8X8, 0, i,
1518 int64_t thisdistortion = 0, thissse = 0;
1521 vp9_build_inter_predictor(pre, pd->pre[0].stride,
1522 dst, pd->dst.stride,
1523 &mi->bmi[i].as_mv[0].as_mv,
1524 &xd->scale_factor[0],
1525 width, height, 0, &xd->subpix, MV_PRECISION_Q3);
1527 if (mi->mbmi.ref_frame[1] > 0) {
1528 uint8_t* const second_pre =
1529 raster_block_offset_uint8(xd, BLOCK_8X8, 0, i,
1530 pd->pre[1].buf, pd->pre[1].stride);
1531 vp9_build_inter_predictor(second_pre, pd->pre[1].stride,
1532 dst, pd->dst.stride,
1533 &mi->bmi[i].as_mv[1].as_mv,
1534 &xd->scale_factor[1],
1535 width, height, 1, &xd->subpix, MV_PRECISION_Q3);
1538 vp9_subtract_block(height, width, src_diff, 8, src, src_stride,
1539 dst, pd->dst.stride);
1542 for (idy = 0; idy < height / 4; ++idy) {
1543 for (idx = 0; idx < width / 4; ++idx) {
1544 int64_t ssz, rd, rd1, rd2;
1546 k += (idy * 2 + idx);
1547 src_diff = raster_block_offset_int16(xd, BLOCK_8X8, 0, k,
1548 x->plane[0].src_diff);
1549 coeff = BLOCK_OFFSET(x->plane[0].coeff, k);
1550 x->fwd_txm4x4(src_diff, coeff, 16);
1551 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1552 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1555 thisrate += cost_coeffs(x, 0, k, PLANE_TYPE_Y_WITH_DC,
1557 tl + (k >> 1), TX_4X4,
1558 vp9_default_scan_4x4,
1559 vp9_default_scan_4x4_neighbors);
1560 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1561 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1567 *distortion = thisdistortion >> 2;
1568 *labelyrate = thisrate;
1569 *sse = thissse >> 2;
1571 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1582 ENTROPY_CONTEXT ta[2];
1583 ENTROPY_CONTEXT tl[2];
1587 int_mv *ref_mv, *second_ref_mv;
1595 MB_PREDICTION_MODE modes[4];
1596 SEG_RDSTAT rdstat[4][VP9_INTER_MODES];
1600 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1602 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1603 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1604 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1605 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1609 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1610 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1611 x->plane[0].src.buf =
1612 raster_block_offset_uint8(&x->e_mbd, BLOCK_8X8, 0, i,
1613 x->plane[0].src.buf,
1614 x->plane[0].src.stride);
1615 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1616 x->e_mbd.plane[0].pre[0].buf =
1617 raster_block_offset_uint8(&x->e_mbd, BLOCK_8X8, 0, i,
1618 x->e_mbd.plane[0].pre[0].buf,
1619 x->e_mbd.plane[0].pre[0].stride);
1620 if (mbmi->ref_frame[1])
1621 x->e_mbd.plane[0].pre[1].buf =
1622 raster_block_offset_uint8(&x->e_mbd, BLOCK_8X8, 0, i,
1623 x->e_mbd.plane[0].pre[1].buf,
1624 x->e_mbd.plane[0].pre[1].stride);
1627 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1628 struct buf_2d orig_pre[2]) {
1629 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1630 x->plane[0].src = orig_src;
1631 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1632 if (mbmi->ref_frame[1])
1633 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1636 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1637 BEST_SEG_INFO *bsi_buf, int filter_idx,
1638 int_mv seg_mvs[4][MAX_REF_FRAMES],
1639 int mi_row, int mi_col) {
1640 int i, j, br = 0, idx, idy;
1641 int64_t bd = 0, block_sse = 0;
1642 MB_PREDICTION_MODE this_mode;
1643 MODE_INFO *mi = x->e_mbd.mode_info_context;
1644 MB_MODE_INFO *const mbmi = &mi->mbmi;
1645 const int label_count = 4;
1646 int64_t this_segment_rd = 0;
1647 int label_mv_thresh;
1648 int segmentyrate = 0;
1649 BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1650 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1651 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1652 vp9_variance_fn_ptr_t *v_fn_ptr;
1653 ENTROPY_CONTEXT t_above[2], t_left[2];
1654 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1656 int subpelmv = 1, have_ref = 0;
1658 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1659 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1661 v_fn_ptr = &cpi->fn_ptr[bsize];
1663 // 64 makes this threshold really big effectively
1664 // making it so that we very rarely check mvs on
1665 // segments. setting this to 1 would make mv thresh
1666 // roughly equal to what it is for macroblocks
1667 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1669 // Segmentation method overheads
1670 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1671 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1672 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1673 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1674 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1675 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1676 MB_PREDICTION_MODE mode_selected = ZEROMV;
1677 int64_t best_rd = INT64_MAX;
1680 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1681 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1682 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1683 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1684 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1685 i, 0, mi_row, mi_col);
1686 if (mbmi->ref_frame[1] > 0)
1687 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1688 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1689 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1690 i, 1, mi_row, mi_col);
1692 // search for the best motion vector on this segment
1693 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1694 const struct buf_2d orig_src = x->plane[0].src;
1695 struct buf_2d orig_pre[2];
1697 mode_idx = inter_mode_offset(this_mode);
1698 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1700 // if we're near/nearest and mv == 0,0, compare to zeromv
1701 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1702 this_mode == ZEROMV) &&
1703 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1704 (mbmi->ref_frame[1] <= 0 ||
1705 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1706 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1707 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1708 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1709 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1711 if (this_mode == NEARMV) {
1714 } else if (this_mode == NEARESTMV) {
1718 assert(this_mode == ZEROMV);
1719 if (mbmi->ref_frame[1] <= 0) {
1721 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1723 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1727 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1728 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1730 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1731 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1737 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1738 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1739 sizeof(bsi->rdstat[i][mode_idx].ta));
1740 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1741 sizeof(bsi->rdstat[i][mode_idx].tl));
1743 // motion search for newmv (single predictor case only)
1744 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV &&
1745 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1748 int thissme, bestsme = INT_MAX;
1749 int sadpb = x->sadperbit4;
1753 /* Is the best so far sufficiently good that we cant justify doing
1754 * and new motion search. */
1755 if (best_rd < label_mv_thresh)
1758 if (cpi->compressor_speed) {
1759 // use previous block's result as next block's MV predictor.
1762 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1765 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1769 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1771 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1772 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1773 // Take wtd average of the step_params based on the last frame's
1774 // max mv magnitude and the best ref mvs of the current block for
1775 // the given reference.
1776 step_param = (vp9_init_search_range(cpi, max_mv) +
1777 cpi->mv_step_param) >> 1;
1779 step_param = cpi->mv_step_param;
1782 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1784 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1785 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1787 // adjust src pointer for this block
1789 if (cpi->sf.search_method == HEX) {
1790 bestsme = vp9_hex_search(x, &mvp_full,
1792 sadpb, 1, v_fn_ptr, 1,
1793 bsi->ref_mv, &mode_mv[NEWMV]);
1794 } else if (cpi->sf.search_method == SQUARE) {
1795 bestsme = vp9_square_search(x, &mvp_full,
1797 sadpb, 1, v_fn_ptr, 1,
1798 bsi->ref_mv, &mode_mv[NEWMV]);
1799 } else if (cpi->sf.search_method == BIGDIA) {
1800 bestsme = vp9_bigdia_search(x, &mvp_full,
1802 sadpb, 1, v_fn_ptr, 1,
1803 bsi->ref_mv, &mode_mv[NEWMV]);
1805 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1806 sadpb, further_steps, 0, v_fn_ptr,
1807 bsi->ref_mv, &mode_mv[NEWMV]);
1810 // Should we do a full search (best quality only)
1811 if (cpi->compressor_speed == 0) {
1812 /* Check if mvp_full is within the range. */
1813 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1814 x->mv_row_min, x->mv_row_max);
1816 thissme = cpi->full_search_sad(x, &mvp_full,
1817 sadpb, 16, v_fn_ptr,
1818 x->nmvjointcost, x->mvcost,
1821 if (thissme < bestsme) {
1823 mode_mv[NEWMV].as_int =
1824 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1826 /* The full search result is actually worse so re-instate the
1827 * previous best vector */
1828 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1829 mode_mv[NEWMV].as_int;
1833 if (bestsme < INT_MAX) {
1836 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1837 bsi->ref_mv, x->errorperbit, v_fn_ptr,
1838 0, cpi->sf.subpel_iters_per_step,
1839 x->nmvjointcost, x->mvcost,
1842 // safe motion search result for use in compound prediction
1843 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1846 // restore src pointers
1847 mi_buf_restore(x, orig_src, orig_pre);
1850 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV &&
1851 mbmi->interp_filter == EIGHTTAP) {
1852 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1853 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1856 // adjust src pointers
1858 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1860 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1861 mi_row, mi_col, seg_mvs[i],
1863 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1864 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1865 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1866 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1868 // restore src pointers
1869 mi_buf_restore(x, orig_src, orig_pre);
1872 bsi->rdstat[i][mode_idx].brate =
1873 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1874 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1875 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1878 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1879 if (num_4x4_blocks_wide > 1)
1880 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1881 mode_mv[this_mode].as_int;
1882 if (num_4x4_blocks_high > 1)
1883 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1884 mode_mv[this_mode].as_int;
1885 if (mbmi->ref_frame[1] > 0) {
1886 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1887 second_mode_mv[this_mode].as_int;
1888 if (num_4x4_blocks_wide > 1)
1889 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1890 second_mode_mv[this_mode].as_int;
1891 if (num_4x4_blocks_high > 1)
1892 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1893 second_mode_mv[this_mode].as_int;
1896 // Trap vectors that reach beyond the UMV borders
1897 if (mv_check_bounds(x, &mode_mv[this_mode]))
1899 if (mbmi->ref_frame[1] > 0 &&
1900 mv_check_bounds(x, &second_mode_mv[this_mode]))
1903 if (filter_idx > 0) {
1904 BEST_SEG_INFO *ref_bsi = bsi_buf;
1905 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
1906 (mode_mv[this_mode].as_mv.col & 0x0f);
1907 have_ref = mode_mv[this_mode].as_int ==
1908 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1909 if (mbmi->ref_frame[1] > 0) {
1910 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
1911 (second_mode_mv[this_mode].as_mv.col & 0x0f);
1912 have_ref &= second_mode_mv[this_mode].as_int ==
1913 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1916 if (filter_idx > 1 && !subpelmv && !have_ref) {
1917 ref_bsi = bsi_buf + 1;
1918 have_ref = mode_mv[this_mode].as_int ==
1919 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1920 if (mbmi->ref_frame[1] > 0) {
1921 have_ref &= second_mode_mv[this_mode].as_int ==
1922 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1926 if (!subpelmv && have_ref &&
1927 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1928 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1929 sizeof(SEG_RDSTAT));
1930 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1931 mode_selected = this_mode;
1932 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1938 bsi->rdstat[i][mode_idx].brdcost =
1939 encode_inter_mb_segment(cpi, x,
1940 bsi->segment_rd - this_segment_rd, i,
1941 &bsi->rdstat[i][mode_idx].byrate,
1942 &bsi->rdstat[i][mode_idx].bdist,
1943 &bsi->rdstat[i][mode_idx].bsse,
1944 bsi->rdstat[i][mode_idx].ta,
1945 bsi->rdstat[i][mode_idx].tl);
1946 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1947 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1948 bsi->rdstat[i][mode_idx].brate, 0);
1949 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1950 bsi->rdstat[i][mode_idx].eobs = x->e_mbd.plane[0].eobs[i];
1953 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1954 mode_selected = this_mode;
1955 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1957 } /*for each 4x4 mode*/
1959 if (best_rd == INT64_MAX) {
1961 for (iy = i + 1; iy < 4; ++iy)
1962 for (midx = 0; midx < VP9_INTER_MODES; ++midx)
1963 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1964 bsi->segment_rd = INT64_MAX;
1968 mode_idx = inter_mode_offset(mode_selected);
1969 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1970 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1972 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1973 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1974 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1977 br += bsi->rdstat[i][mode_idx].brate;
1978 bd += bsi->rdstat[i][mode_idx].bdist;
1979 block_sse += bsi->rdstat[i][mode_idx].bsse;
1980 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
1981 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
1983 if (this_segment_rd > bsi->segment_rd) {
1985 for (iy = i + 1; iy < 4; ++iy)
1986 for (midx = 0; midx < VP9_INTER_MODES; ++midx)
1987 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1988 bsi->segment_rd = INT64_MAX;
1992 for (j = 1; j < num_4x4_blocks_high; ++j)
1993 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1994 &x->partition_info->bmi[i],
1995 sizeof(x->partition_info->bmi[i]));
1996 for (j = 1; j < num_4x4_blocks_wide; ++j)
1997 vpx_memcpy(&x->partition_info->bmi[i + j],
1998 &x->partition_info->bmi[i],
1999 sizeof(x->partition_info->bmi[i]));
2001 } /* for each label */
2005 bsi->segment_yrate = segmentyrate;
2006 bsi->segment_rd = this_segment_rd;
2007 bsi->sse = block_sse;
2009 // update the coding decisions
2010 for (i = 0; i < 4; ++i)
2011 bsi->modes[i] = x->partition_info->bmi[i].mode;
2014 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2015 int_mv *best_ref_mv,
2016 int_mv *second_best_ref_mv,
2020 int64_t *returndistortion,
2021 int *skippable, int64_t *psse,
2023 int_mv seg_mvs[4][MAX_REF_FRAMES],
2024 BEST_SEG_INFO *bsi_buf,
2026 int mi_row, int mi_col) {
2028 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2029 MACROBLOCKD *xd = &x->e_mbd;
2030 MODE_INFO *mi = xd->mode_info_context;
2031 MB_MODE_INFO *mbmi = &mi->mbmi;
2036 bsi->segment_rd = best_rd;
2037 bsi->ref_mv = best_ref_mv;
2038 bsi->second_ref_mv = second_best_ref_mv;
2039 bsi->mvp.as_int = best_ref_mv->as_int;
2040 bsi->mvthresh = mvthresh;
2042 for (i = 0; i < 4; i++)
2043 bsi->modes[i] = ZEROMV;
2045 rd_check_segment_txsize(cpi, x, bsi_buf, filter_idx, seg_mvs, mi_row, mi_col);
2047 if (bsi->segment_rd > best_rd)
2049 /* set it to the best */
2050 for (i = 0; i < 4; i++) {
2051 mode_idx = inter_mode_offset(bsi->modes[i]);
2052 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2053 if (mbmi->ref_frame[1] > 0)
2054 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2055 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2056 x->partition_info->bmi[i].mode = bsi->modes[i];
2060 * used to set mbmi->mv.as_int
2062 *returntotrate = bsi->r;
2063 *returndistortion = bsi->d;
2064 *returnyrate = bsi->segment_yrate;
2065 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
2067 mbmi->mode = bsi->modes[3];
2069 return bsi->segment_rd;
2072 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2073 uint8_t *ref_y_buffer, int ref_y_stride,
2074 int ref_frame, BLOCK_SIZE_TYPE block_size ) {
2075 MACROBLOCKD *xd = &x->e_mbd;
2076 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2081 int best_sad = INT_MAX;
2082 int this_sad = INT_MAX;
2083 unsigned int max_mv = 0;
2085 uint8_t *src_y_ptr = x->plane[0].src.buf;
2087 int row_offset, col_offset;
2089 // Get the sad for each candidate reference mv
2090 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
2091 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
2093 max_mv = MAX(max_mv,
2094 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2095 // The list is at an end if we see 0 for a second time.
2096 if (!this_mv.as_int && zero_seen)
2098 zero_seen = zero_seen || !this_mv.as_int;
2100 row_offset = this_mv.as_mv.row >> 3;
2101 col_offset = this_mv.as_mv.col >> 3;
2102 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2104 // Find sad for current vector.
2105 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2106 ref_y_ptr, ref_y_stride,
2109 // Note if it is the best so far.
2110 if (this_sad < best_sad) {
2111 best_sad = this_sad;
2116 // Note the index of the mv that worked best in the reference list.
2117 x->mv_best_ref_index[ref_frame] = best_index;
2118 x->max_mv_context[ref_frame] = max_mv;
2121 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2122 unsigned int *ref_costs_single,
2123 unsigned int *ref_costs_comp,
2124 vp9_prob *comp_mode_p) {
2125 VP9_COMMON *const cm = &cpi->common;
2126 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2127 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2129 if (seg_ref_active) {
2130 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2131 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2134 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2135 vp9_prob comp_inter_p = 128;
2137 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
2138 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
2139 *comp_mode_p = comp_inter_p;
2144 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2146 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
2147 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2148 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2149 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2151 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2152 base_cost += vp9_cost_bit(comp_inter_p, 0);
2154 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2155 ref_costs_single[ALTREF_FRAME] = base_cost;
2156 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2157 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2158 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2159 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2160 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2162 ref_costs_single[LAST_FRAME] = 512;
2163 ref_costs_single[GOLDEN_FRAME] = 512;
2164 ref_costs_single[ALTREF_FRAME] = 512;
2166 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
2167 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2168 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2170 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2171 base_cost += vp9_cost_bit(comp_inter_p, 1);
2173 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2174 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2176 ref_costs_comp[LAST_FRAME] = 512;
2177 ref_costs_comp[GOLDEN_FRAME] = 512;
2182 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2184 PARTITION_INFO *partition,
2186 int_mv *second_ref_mv,
2187 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2188 int64_t tx_size_diff[TX_MODES],
2189 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
2190 MACROBLOCKD *const xd = &x->e_mbd;
2192 // Take a snapshot of the coding context so it can be
2193 // restored if we decide to encode this way
2194 ctx->skip = x->skip;
2195 ctx->best_mode_index = mode_index;
2196 ctx->mic = *xd->mode_info_context;
2199 ctx->partition_info = *partition;
2201 ctx->best_ref_mv.as_int = ref_mv->as_int;
2202 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2204 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2205 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2206 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2208 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
2209 // doesn't actually work this way
2210 memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2211 memcpy(ctx->best_filter_diff, best_filter_diff,
2212 sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
2215 static void setup_pred_block(const MACROBLOCKD *xd,
2216 struct buf_2d dst[MAX_MB_PLANE],
2217 const YV12_BUFFER_CONFIG *src,
2218 int mi_row, int mi_col,
2219 const struct scale_factors *scale,
2220 const struct scale_factors *scale_uv) {
2223 dst[0].buf = src->y_buffer;
2224 dst[0].stride = src->y_stride;
2225 dst[1].buf = src->u_buffer;
2226 dst[2].buf = src->v_buffer;
2227 dst[1].stride = dst[2].stride = src->uv_stride;
2229 dst[3].buf = src->alpha_buffer;
2230 dst[3].stride = src->alpha_stride;
2233 // TODO(jkoleszar): Make scale factors per-plane data
2234 for (i = 0; i < MAX_MB_PLANE; i++) {
2235 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2236 i ? scale_uv : scale,
2237 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2241 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2242 int idx, MV_REFERENCE_FRAME frame_type,
2243 BLOCK_SIZE_TYPE block_size,
2244 int mi_row, int mi_col,
2245 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2246 int_mv frame_near_mv[MAX_REF_FRAMES],
2247 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2248 struct scale_factors scale[MAX_REF_FRAMES]) {
2249 VP9_COMMON *cm = &cpi->common;
2250 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2251 MACROBLOCKD *const xd = &x->e_mbd;
2252 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2254 // set up scaling factors
2255 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2257 scale[frame_type].x_offset_q4 =
2258 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
2259 VP9_REF_SCALE_SHIFT) & 0xf;
2260 scale[frame_type].y_offset_q4 =
2261 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
2262 VP9_REF_SCALE_SHIFT) & 0xf;
2264 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2265 // use the UV scaling factors.
2266 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2267 &scale[frame_type], &scale[frame_type]);
2269 // Gets an initial list of candidate vectors from neighbours and orders them
2270 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
2271 xd->prev_mode_info_context,
2273 mbmi->ref_mvs[frame_type],
2274 cpi->common.ref_frame_sign_bias, mi_row, mi_col);
2276 // Candidate refinement carried out at encoder and decoder
2277 vp9_find_best_ref_mvs(xd,
2278 mbmi->ref_mvs[frame_type],
2279 &frame_nearest_mv[frame_type],
2280 &frame_near_mv[frame_type]);
2282 // Further refinement that is encode side only to test the top few candidates
2283 // in full and choose the best as the centre point for subsequent searches.
2284 // The current implementation doesn't support scaling.
2285 if (scale[frame_type].x_scale_fp == VP9_REF_NO_SCALE &&
2286 scale[frame_type].y_scale_fp == VP9_REF_NO_SCALE)
2287 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2288 frame_type, block_size);
2291 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2292 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2293 int fb = get_ref_frame_idx(cpi, ref_frame);
2294 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
2295 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
2296 return scaled_ref_frame;
2299 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2300 const MACROBLOCKD *const xd = &x->e_mbd;
2301 const MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2302 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2303 return SWITCHABLE_INTERP_RATE_FACTOR *
2304 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2307 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2308 BLOCK_SIZE_TYPE bsize,
2309 int mi_row, int mi_col,
2310 int_mv *tmp_mv, int *rate_mv) {
2311 MACROBLOCKD *xd = &x->e_mbd;
2312 VP9_COMMON *cm = &cpi->common;
2313 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2314 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2315 int bestsme = INT_MAX;
2316 int further_steps, step_param;
2317 int sadpb = x->sadperbit16;
2319 int ref = mbmi->ref_frame[0];
2320 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2321 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2323 int tmp_col_min = x->mv_col_min;
2324 int tmp_col_max = x->mv_col_max;
2325 int tmp_row_min = x->mv_row_min;
2326 int tmp_row_max = x->mv_row_max;
2328 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2330 if (scaled_ref_frame) {
2332 // Swap out the reference frame for a version that's been scaled to
2333 // match the resolution of the current frame, allowing the existing
2334 // motion search code to be used without additional modifications.
2335 for (i = 0; i < MAX_MB_PLANE; i++)
2336 backup_yv12[i] = xd->plane[i].pre[0];
2338 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2341 vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
2343 // Adjust search parameters based on small partitions' result.
2345 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2346 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2347 // adjust search range
2352 // Get prediction MV.
2353 mvp_full.as_int = x->pred_mv.as_int;
2355 // Adjust MV sign if needed.
2356 if (cm->ref_frame_sign_bias[ref]) {
2357 mvp_full.as_mv.col *= -1;
2358 mvp_full.as_mv.row *= -1;
2361 // Work out the size of the first step in the mv step search.
2362 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2363 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2364 // Take wtd average of the step_params based on the last frame's
2365 // max mv magnitude and that based on the best ref mvs of the current
2366 // block for the given reference.
2367 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2368 cpi->mv_step_param) >> 1;
2370 step_param = cpi->mv_step_param;
2372 // mvp_full.as_int = ref_mv[0].as_int;
2374 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
2377 mvp_full.as_mv.col >>= 3;
2378 mvp_full.as_mv.row >>= 3;
2380 // Further step/diamond searches as necessary
2381 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2383 if (cpi->sf.search_method == HEX) {
2384 bestsme = vp9_hex_search(x, &mvp_full,
2387 &cpi->fn_ptr[block_size], 1,
2389 } else if (cpi->sf.search_method == SQUARE) {
2390 bestsme = vp9_square_search(x, &mvp_full,
2393 &cpi->fn_ptr[block_size], 1,
2395 } else if (cpi->sf.search_method == BIGDIA) {
2396 bestsme = vp9_bigdia_search(x, &mvp_full,
2399 &cpi->fn_ptr[block_size], 1,
2402 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2403 sadpb, further_steps, 1,
2404 &cpi->fn_ptr[block_size],
2408 x->mv_col_min = tmp_col_min;
2409 x->mv_col_max = tmp_col_max;
2410 x->mv_row_min = tmp_row_min;
2411 x->mv_row_max = tmp_row_max;
2413 if (bestsme < INT_MAX) {
2414 int dis; /* TODO: use dis in distortion calculation later. */
2416 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
2418 &cpi->fn_ptr[block_size],
2419 0, cpi->sf.subpel_iters_per_step,
2420 x->nmvjointcost, x->mvcost,
2423 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
2424 x->nmvjointcost, x->mvcost,
2426 if (scaled_ref_frame) {
2428 for (i = 0; i < MAX_MB_PLANE; i++)
2429 xd->plane[i].pre[0] = backup_yv12[i];
2433 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2434 BLOCK_SIZE_TYPE bsize,
2436 int mi_row, int mi_col,
2437 int_mv single_newmv[MAX_REF_FRAMES],
2439 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2440 MACROBLOCKD *xd = &x->e_mbd;
2441 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2442 int refs[2] = { mbmi->ref_frame[0],
2443 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2445 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2447 // Prediction buffer from second frame.
2448 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2450 // Do joint motion search in compound mode to get more accurate mv.
2451 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2452 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
2453 struct buf_2d scaled_first_yv12;
2454 int last_besterr[2] = {INT_MAX, INT_MAX};
2455 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2456 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
2457 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
2459 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2460 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2462 if (scaled_ref_frame[0]) {
2464 // Swap out the reference frame for a version that's been scaled to
2465 // match the resolution of the current frame, allowing the existing
2466 // motion search code to be used without additional modifications.
2467 for (i = 0; i < MAX_MB_PLANE; i++)
2468 backup_yv12[i] = xd->plane[i].pre[0];
2469 setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
2472 if (scaled_ref_frame[1]) {
2474 for (i = 0; i < MAX_MB_PLANE; i++)
2475 backup_second_yv12[i] = xd->plane[i].pre[1];
2477 setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL);
2480 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
2482 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
2484 scaled_first_yv12 = xd->plane[0].pre[0];
2486 // Initialize mv using single prediction mode result.
2487 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2488 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2490 // Allow joint search multiple times iteratively for each ref frame
2491 // and break out the search loop if it couldn't find better mv.
2492 for (ite = 0; ite < 4; ite++) {
2493 struct buf_2d ref_yv12[2];
2494 int bestsme = INT_MAX;
2495 int sadpb = x->sadperbit16;
2497 int search_range = 3;
2499 int tmp_col_min = x->mv_col_min;
2500 int tmp_col_max = x->mv_col_max;
2501 int tmp_row_min = x->mv_row_min;
2502 int tmp_row_max = x->mv_row_max;
2505 // Initialized here because of compiler problem in Visual Studio.
2506 ref_yv12[0] = xd->plane[0].pre[0];
2507 ref_yv12[1] = xd->plane[0].pre[1];
2509 // Get pred block from second frame.
2510 vp9_build_inter_predictor(ref_yv12[!id].buf,
2511 ref_yv12[!id].stride,
2513 &frame_mv[refs[!id]].as_mv,
2514 &xd->scale_factor[!id],
2516 &xd->subpix, MV_PRECISION_Q3);
2518 // Compound motion search on first ref frame.
2520 xd->plane[0].pre[0] = ref_yv12[id];
2521 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
2523 // Use mv result from single mode as mvp.
2524 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2526 tmp_mv.as_mv.col >>= 3;
2527 tmp_mv.as_mv.row >>= 3;
2529 // Small-range full-pixel motion search
2530 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2532 &cpi->fn_ptr[block_size],
2533 x->nmvjointcost, x->mvcost,
2534 &ref_mv[id], second_pred,
2537 x->mv_col_min = tmp_col_min;
2538 x->mv_col_max = tmp_col_max;
2539 x->mv_row_min = tmp_row_min;
2540 x->mv_row_max = tmp_row_max;
2542 if (bestsme < INT_MAX) {
2543 int dis; /* TODO: use dis in distortion calculation later. */
2546 bestsme = cpi->find_fractional_mv_step_comp(
2550 &cpi->fn_ptr[block_size],
2551 0, cpi->sf.subpel_iters_per_step,
2552 x->nmvjointcost, x->mvcost,
2553 &dis, &sse, second_pred,
2558 xd->plane[0].pre[0] = scaled_first_yv12;
2560 if (bestsme < last_besterr[id]) {
2561 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2562 last_besterr[id] = bestsme;
2568 // restore the predictor
2569 if (scaled_ref_frame[0]) {
2571 for (i = 0; i < MAX_MB_PLANE; i++)
2572 xd->plane[i].pre[0] = backup_yv12[i];
2575 if (scaled_ref_frame[1]) {
2577 for (i = 0; i < MAX_MB_PLANE; i++)
2578 xd->plane[i].pre[1] = backup_second_yv12[i];
2580 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2581 &mbmi->ref_mvs[refs[0]][0],
2582 x->nmvjointcost, x->mvcost, 96);
2583 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2584 &mbmi->ref_mvs[refs[1]][0],
2585 x->nmvjointcost, x->mvcost, 96);
2587 vpx_free(second_pred);
2590 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2591 BLOCK_SIZE_TYPE bsize,
2592 int64_t txfm_cache[],
2593 int *rate2, int64_t *distortion,
2595 int *rate_y, int64_t *distortion_y,
2596 int *rate_uv, int64_t *distortion_uv,
2597 int *mode_excluded, int *disable_skip,
2598 INTERPOLATIONFILTERTYPE *best_filter,
2599 int_mv (*mode_mv)[MAX_REF_FRAMES],
2600 int mi_row, int mi_col,
2601 int_mv single_newmv[MAX_REF_FRAMES],
2602 int64_t *psse, int64_t ref_best_rd) {
2603 VP9_COMMON *cm = &cpi->common;
2604 MACROBLOCKD *xd = &x->e_mbd;
2605 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2606 const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2607 const int num_refs = is_comp_pred ? 2 : 1;
2608 const int this_mode = mbmi->mode;
2609 int_mv *frame_mv = mode_mv[this_mode];
2611 int refs[2] = { mbmi->ref_frame[0],
2612 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2614 int64_t this_rd = 0;
2615 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2616 int pred_exists = 0;
2617 int interpolating_intpel_seen = 0;
2619 int64_t rd, best_rd = INT64_MAX;
2620 int best_needs_copy = 0;
2621 uint8_t *orig_dst[MAX_MB_PLANE];
2622 int orig_dst_stride[MAX_MB_PLANE];
2625 if (this_mode == NEWMV) {
2628 // Initialize mv using single prediction mode result.
2629 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2630 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2632 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2633 joint_motion_search(cpi, x, bsize, frame_mv,
2634 mi_row, mi_col, single_newmv, &rate_mv);
2636 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2637 &mbmi->ref_mvs[refs[0]][0],
2638 x->nmvjointcost, x->mvcost, 96);
2639 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2640 &mbmi->ref_mvs[refs[1]][0],
2641 x->nmvjointcost, x->mvcost, 96);
2643 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2644 frame_mv[refs[1]].as_int == INVALID_MV)
2649 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2651 frame_mv[refs[0]].as_int =
2652 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2653 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2657 // if we're near/nearest and mv == 0,0, compare to zeromv
2658 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2659 frame_mv[refs[0]].as_int == 0 &&
2660 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2661 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2662 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2663 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2664 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2665 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2667 if (this_mode == NEARMV) {
2670 } else if (this_mode == NEARESTMV) {
2674 assert(this_mode == ZEROMV);
2675 if (num_refs == 1) {
2677 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2679 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2683 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2684 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2686 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2687 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2693 for (i = 0; i < num_refs; ++i) {
2694 cur_mv[i] = frame_mv[refs[i]];
2695 // Clip "next_nearest" so that it does not extend to far out of image
2696 if (this_mode != NEWMV)
2697 clamp_mv2(&cur_mv[i].as_mv, xd);
2699 if (mv_check_bounds(x, &cur_mv[i]))
2701 mbmi->mv[i].as_int = cur_mv[i].as_int;
2704 // do first prediction into the destination buffer. Do the next
2705 // prediction into a temporary buffer. Then keep track of which one
2706 // of these currently holds the best predictor, and use the other
2707 // one for future predictions. In the end, copy from tmp_buf to
2708 // dst if necessary.
2709 for (i = 0; i < MAX_MB_PLANE; i++) {
2710 orig_dst[i] = xd->plane[i].dst.buf;
2711 orig_dst_stride[i] = xd->plane[i].dst.stride;
2714 /* We don't include the cost of the second reference here, because there
2715 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2716 * words if you present them in that order, the second one is always known
2717 * if the first is known */
2718 *rate2 += cost_mv_ref(cpi, this_mode,
2719 mbmi->mode_context[mbmi->ref_frame[0]]);
2721 if (!(*mode_excluded)) {
2723 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2725 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2730 interpolating_intpel_seen = 0;
2731 // Are all MVs integer pel for Y and UV
2732 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2733 (mbmi->mv[0].as_mv.col & 15) == 0;
2735 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2736 (mbmi->mv[1].as_mv.col & 15) == 0;
2737 // Search for best switchable filter by checking the variance of
2738 // pred error irrespective of whether the filter will be used
2739 *best_filter = EIGHTTAP;
2740 if (cpi->sf.use_8tap_always) {
2741 *best_filter = EIGHTTAP;
2742 vp9_zero(cpi->rd_filter_cache);
2745 int tmp_rate_sum = 0;
2746 int64_t tmp_dist_sum = 0;
2748 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
2749 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2752 const int is_intpel_interp = intpel_mv;
2753 mbmi->interp_filter = i;
2754 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2755 rs = get_switchable_rate(x);
2756 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2758 if (interpolating_intpel_seen && is_intpel_interp) {
2759 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2760 tmp_rate_sum, tmp_dist_sum);
2761 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
2762 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
2763 cpi->rd_filter_cache[i] + rs_rd);
2764 rd = cpi->rd_filter_cache[i];
2765 if (cm->mcomp_filter_type == SWITCHABLE)
2769 int64_t dist_sum = 0;
2770 if ((cm->mcomp_filter_type == SWITCHABLE &&
2771 (!i || best_needs_copy)) ||
2772 (cm->mcomp_filter_type != SWITCHABLE &&
2773 (cm->mcomp_filter_type == mbmi->interp_filter ||
2774 (!interpolating_intpel_seen && is_intpel_interp)))) {
2775 for (j = 0; j < MAX_MB_PLANE; j++) {
2776 xd->plane[j].dst.buf = orig_dst[j];
2777 xd->plane[j].dst.stride = orig_dst_stride[j];
2780 for (j = 0; j < MAX_MB_PLANE; j++) {
2781 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2782 xd->plane[j].dst.stride = 64;
2785 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2786 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2787 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2788 rate_sum, dist_sum);
2789 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
2790 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
2791 cpi->rd_filter_cache[i] + rs_rd);
2792 rd = cpi->rd_filter_cache[i];
2793 if (cm->mcomp_filter_type == SWITCHABLE)
2795 if (!interpolating_intpel_seen && is_intpel_interp) {
2796 tmp_rate_sum = rate_sum;
2797 tmp_dist_sum = dist_sum;
2800 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2801 if (rd / 2 > ref_best_rd) {
2802 for (i = 0; i < MAX_MB_PLANE; i++) {
2803 xd->plane[i].dst.buf = orig_dst[i];
2804 xd->plane[i].dst.stride = orig_dst_stride[i];
2809 newbest = i == 0 || rd < best_rd;
2813 *best_filter = mbmi->interp_filter;
2814 if (cm->mcomp_filter_type == SWITCHABLE && i &&
2815 !(interpolating_intpel_seen && is_intpel_interp))
2816 best_needs_copy = !best_needs_copy;
2819 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2820 (cm->mcomp_filter_type != SWITCHABLE &&
2821 cm->mcomp_filter_type == mbmi->interp_filter)) {
2824 interpolating_intpel_seen |= is_intpel_interp;
2827 for (i = 0; i < MAX_MB_PLANE; i++) {
2828 xd->plane[i].dst.buf = orig_dst[i];
2829 xd->plane[i].dst.stride = orig_dst_stride[i];
2832 // Set the appropriate filter
2833 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2834 cm->mcomp_filter_type : *best_filter;
2835 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2836 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
2839 if (best_needs_copy) {
2840 // again temporarily set the buffers to local memory to prevent a memcpy
2841 for (i = 0; i < MAX_MB_PLANE; i++) {
2842 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2843 xd->plane[i].dst.stride = 64;
2847 // Handles the special case when a filter that is not in the
2848 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2849 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2853 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2856 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2857 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2858 // if current pred_error modeled rd is substantially more than the best
2859 // so far, do not bother doing full rd
2860 if (rd / 2 > ref_best_rd) {
2861 for (i = 0; i < MAX_MB_PLANE; i++) {
2862 xd->plane[i].dst.buf = orig_dst[i];
2863 xd->plane[i].dst.stride = orig_dst_stride[i];
2869 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2870 *rate2 += get_switchable_rate(x);
2872 if (!is_comp_pred) {
2873 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2875 else if (x->encode_breakout) {
2876 const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2877 const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize,
2879 unsigned int var, sse;
2880 // Skipping threshold for ac.
2881 unsigned int thresh_ac;
2882 // The encode_breakout input
2883 unsigned int encode_breakout = x->encode_breakout << 4;
2885 // Calculate threshold according to dequant value.
2886 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2888 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2889 if (thresh_ac > 36000)
2892 // Use encode_breakout input if it is bigger than internal threshold.
2893 if (thresh_ac < encode_breakout)
2894 thresh_ac = encode_breakout;
2896 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2897 xd->plane[0].dst.buf,
2898 xd->plane[0].dst.stride, &sse);
2900 // Adjust threshold according to partition size.
2901 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2902 b_height_log2_lookup[bsize]);
2904 // Y skipping condition checking
2905 if (sse < thresh_ac || sse == 0) {
2906 // Skipping threshold for dc
2907 unsigned int thresh_dc;
2909 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2911 // dc skipping checking
2912 if ((sse - var) < thresh_dc || sse == var) {
2913 unsigned int sse_u, sse_v;
2914 unsigned int var_u, var_v;
2916 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2917 x->plane[1].src.stride,
2918 xd->plane[1].dst.buf,
2919 xd->plane[1].dst.stride, &sse_u);
2921 // U skipping condition checking
2922 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2923 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2924 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2925 x->plane[2].src.stride,
2926 xd->plane[2].dst.buf,
2927 xd->plane[2].dst.stride, &sse_v);
2929 // V skipping condition checking
2930 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2931 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2937 // Scaling factor for SSE from spatial domain to frequency domain
2938 // is 16. Adjust distortion accordingly.
2939 *distortion_uv = (sse_u + sse_v) << 4;
2940 *distortion = (sse << 4) + *distortion_uv;
2943 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2952 int skippable_y, skippable_uv;
2953 int64_t sseuv = INT_MAX;
2955 // Y cost and distortion
2956 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2957 bsize, txfm_cache, ref_best_rd);
2959 if (*rate_y == INT_MAX) {
2961 *distortion = INT64_MAX;
2962 for (i = 0; i < MAX_MB_PLANE; i++) {
2963 xd->plane[i].dst.buf = orig_dst[i];
2964 xd->plane[i].dst.stride = orig_dst_stride[i];
2970 *distortion += *distortion_y;
2972 super_block_uvrd(cm, x, rate_uv, distortion_uv,
2973 &skippable_uv, &sseuv, bsize);
2977 *distortion += *distortion_uv;
2978 *skippable = skippable_y && skippable_uv;
2981 for (i = 0; i < MAX_MB_PLANE; i++) {
2982 xd->plane[i].dst.buf = orig_dst[i];
2983 xd->plane[i].dst.stride = orig_dst_stride[i];
2986 return this_rd; // if 0, this will be re-calculated by caller
2989 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2990 int *returnrate, int64_t *returndist,
2991 BLOCK_SIZE_TYPE bsize,
2992 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2993 VP9_COMMON *const cm = &cpi->common;
2994 MACROBLOCKD *const xd = &x->e_mbd;
2995 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2996 int y_skip = 0, uv_skip;
2997 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3000 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
3001 if (bsize >= BLOCK_8X8) {
3002 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3003 &dist_y, &y_skip, bsize, tx_cache,
3004 best_rd) >= best_rd) {
3005 *returnrate = INT_MAX;
3008 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3009 &dist_uv, &uv_skip, bsize);
3012 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3013 &dist_y, best_rd) >= best_rd) {
3014 *returnrate = INT_MAX;
3017 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3018 &dist_uv, &uv_skip, BLOCK_8X8);
3021 if (y_skip && uv_skip) {
3022 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3023 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3024 *returndist = dist_y + (dist_uv >> 2);
3025 vp9_zero(ctx->tx_rd_diff);
3028 *returnrate = rate_y + rate_uv +
3029 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3030 *returndist = dist_y + (dist_uv >> 2);
3031 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3032 for (i = 0; i < TX_MODES; i++)
3033 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3036 ctx->mic = *xd->mode_info_context;
3039 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3040 int mi_row, int mi_col,
3042 int64_t *returndistortion,
3043 BLOCK_SIZE_TYPE bsize,
3044 PICK_MODE_CONTEXT *ctx,
3045 int64_t best_rd_so_far) {
3046 VP9_COMMON *cm = &cpi->common;
3047 MACROBLOCKD *xd = &x->e_mbd;
3048 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
3049 const struct segmentation *seg = &cm->seg;
3050 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3051 MB_PREDICTION_MODE this_mode;
3052 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3053 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
3055 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3056 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3057 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3058 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3060 int idx_list[4] = {0,
3064 int64_t best_rd = best_rd_so_far;
3065 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3066 int64_t best_tx_rd[TX_MODES];
3067 int64_t best_tx_diff[TX_MODES];
3068 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3069 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3070 int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
3071 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
3072 MB_MODE_INFO best_mbmode = { 0 };
3074 int mode_index, best_mode_index = 0;
3075 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3076 vp9_prob comp_mode_p;
3077 int64_t best_intra_rd = INT64_MAX;
3078 int64_t best_inter_rd = INT64_MAX;
3079 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3080 // MB_PREDICTION_MODE best_inter_mode = ZEROMV;
3081 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3082 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
3083 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3084 int64_t dist_uv[TX_SIZES];
3085 int skip_uv[TX_SIZES];
3086 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3087 struct scale_factors scale_factor[4];
3088 unsigned int ref_frame_mask = 0;
3089 unsigned int mode_mask = 0;
3090 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3091 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3092 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3093 cpi->common.y_dc_delta_q);
3094 int_mv seg_mvs[4][MAX_REF_FRAMES];
3095 union b_mode_info best_bmodes[4];
3096 PARTITION_INFO best_partition;
3097 int bwsl = b_width_log2(bsize);
3098 int bws = (1 << bwsl) / 4; // mode_info step for subsize
3099 int bhsl = b_height_log2(bsize);
3100 int bhs = (1 << bhsl) / 4; // mode_info step for subsize
3103 x->skip_encode = (cpi->sf.skip_encode_frame &&
3104 xd->q_index < QIDX_SKIP_THRESH);
3106 for (i = 0; i < 4; i++) {
3108 for (j = 0; j < MAX_REF_FRAMES; j++)
3109 seg_mvs[i][j].as_int = INVALID_MV;
3111 // Everywhere the flag is set the error is much higher than its neighbors.
3112 ctx->frames_with_high_error = 0;
3113 ctx->modes_with_high_error = 0;
3115 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3118 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3119 best_pred_rd[i] = INT64_MAX;
3120 for (i = 0; i < TX_MODES; i++)
3121 best_tx_rd[i] = INT64_MAX;
3122 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
3123 best_filter_rd[i] = INT64_MAX;
3124 for (i = 0; i < TX_SIZES; i++)
3125 rate_uv_intra[i] = INT_MAX;
3127 *returnrate = INT_MAX;
3129 // Create a mask set to 1 for each reference frame used by a smaller
3131 if (cpi->sf.use_avoid_tested_higherror) {
3132 switch (block_size) {
3134 for (i = 0; i < 4; i++) {
3135 for (j = 0; j < 4; j++) {
3136 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3137 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3140 for (i = 0; i < 4; i++) {
3141 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3142 mode_mask |= x->sb32_context[i].modes_with_high_error;
3146 for (i = 0; i < 4; i++) {
3148 x->mb_context[xd->sb_index][i].frames_with_high_error;
3149 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
3153 // Until we handle all block sizes set it to present;
3158 ref_frame_mask = ~ref_frame_mask;
3159 mode_mask = ~mode_mask;
3162 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3163 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3164 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
3165 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
3166 yv12_mb, scale_factor);
3168 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3169 frame_mv[ZEROMV][ref_frame].as_int = 0;
3172 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3173 int mode_excluded = 0;
3174 int64_t this_rd = INT64_MAX;
3175 int disable_skip = 0;
3176 int compmode_cost = 0;
3177 int rate2 = 0, rate_y = 0, rate_uv = 0;
3178 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3180 int64_t tx_cache[TX_MODES];
3183 int64_t total_sse = INT_MAX;
3186 for (i = 0; i < TX_MODES; ++i)
3187 tx_cache[i] = INT64_MAX;
3190 this_mode = vp9_mode_order[mode_index].mode;
3191 ref_frame = vp9_mode_order[mode_index].ref_frame;
3192 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3194 // Skip modes that have been masked off but always consider first mode.
3195 if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
3196 (cpi->unused_mode_skip_mask & (1 << mode_index)) )
3199 // Skip if the current reference frame has been masked off
3200 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3201 (cpi->ref_frame_mask & (1 << ref_frame)))
3204 // Test best rd so far against threshold for trying this mode.
3205 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
3206 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
3207 cpi->rd_threshes[bsize][mode_index] == INT_MAX)
3210 // Do not allow compound prediction if the segment level reference
3211 // frame feature is in use as in this case there can only be one reference.
3212 if ((second_ref_frame > INTRA_FRAME) &&
3213 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3216 // Skip some checking based on small partitions' result.
3217 if (x->fast_ms > 1 && !ref_frame)
3219 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3222 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3223 if (!(ref_frame_mask & (1 << ref_frame))) {
3226 if (!(mode_mask & (1 << this_mode))) {
3229 if (second_ref_frame != NONE
3230 && !(ref_frame_mask & (1 << second_ref_frame))) {
3235 mbmi->ref_frame[0] = ref_frame;
3236 mbmi->ref_frame[1] = second_ref_frame;
3238 if (!(ref_frame == INTRA_FRAME
3239 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3242 if (!(second_ref_frame == NONE
3243 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3247 comp_pred = second_ref_frame > INTRA_FRAME;
3249 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3250 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3252 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3253 if (ref_frame != best_inter_ref_frame &&
3254 second_ref_frame != best_inter_ref_frame)
3257 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3259 if (ref_frame > 0 &&
3260 (scale_factor[ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
3261 scale_factor[ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
3262 this_mode == SPLITMV)
3265 if (second_ref_frame > 0 &&
3266 (scale_factor[second_ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
3267 scale_factor[second_ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
3268 this_mode == SPLITMV)
3271 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3272 mbmi->mode = this_mode;
3273 mbmi->uv_mode = DC_PRED;
3275 // Evaluate all sub-pel filters irrespective of whether we can use
3276 // them for this frame.
3277 mbmi->interp_filter = cm->mcomp_filter_type;
3278 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3280 if (bsize >= BLOCK_8X8 &&
3281 (this_mode == I4X4_PRED || this_mode == SPLITMV))
3283 if (bsize < BLOCK_8X8 &&
3284 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
3288 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3290 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3292 mode_excluded = mode_excluded
3294 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3296 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3299 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3303 // Select prediction reference frames.
3304 for (i = 0; i < MAX_MB_PLANE; i++) {
3305 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3307 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3310 // If the segment reference frame feature is enabled....
3311 // then do nothing if the current ref frame is not allowed..
3312 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3313 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3316 // If the segment skip feature is enabled....
3317 // then do nothing if the current mode is not allowed..
3318 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3319 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3321 // Disable this drop out case if the ref frame
3322 // segment level feature is enabled for this segment. This is to
3323 // prevent the possibility that we end up unable to pick any mode.
3324 } else if (!vp9_segfeature_active(seg, segment_id,
3325 SEG_LVL_REF_FRAME)) {
3326 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3327 // unless ARNR filtering is enabled in which case we want
3328 // an unfiltered alternative. We allow near/nearest as well
3329 // because they may result in zero-zero MVs but be cheaper.
3330 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3331 if ((this_mode != ZEROMV &&
3332 !(this_mode == NEARMV &&
3333 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3334 !(this_mode == NEARESTMV &&
3335 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3336 ref_frame != ALTREF_FRAME) {
3341 // TODO(JBB): This is to make up for the fact that we don't have sad
3342 // functions that work when the block size reads outside the umv. We
3343 // should fix this either by making the motion search just work on
3344 // a representative block in the boundary ( first ) and then implement a
3345 // function that does sads when inside the border..
3346 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3347 this_mode == NEWMV) {
3351 if (this_mode == I4X4_PRED) {
3355 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3356 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME))
3360 // I4X4_PRED is only considered for block sizes less than 8x8.
3361 mbmi->txfm_size = TX_4X4;
3362 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3363 &distortion_y, best_rd) >= best_rd)
3366 rate2 += intra_cost_penalty;
3367 distortion2 += distortion_y;
3369 if (rate_uv_intra[TX_4X4] == INT_MAX) {
3370 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
3371 &rate_uv_tokenonly[TX_4X4],
3372 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
3375 rate2 += rate_uv_intra[TX_4X4];
3376 rate_uv = rate_uv_tokenonly[TX_4X4];
3377 distortion2 += dist_uv[TX_4X4];
3378 distortion_uv = dist_uv[TX_4X4];
3379 mbmi->uv_mode = mode_uv[TX_4X4];
3380 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3381 for (i = 0; i < TX_MODES; ++i)
3382 tx_cache[i] = tx_cache[ONLY_4X4];
3383 } else if (ref_frame == INTRA_FRAME) {
3385 // Disable intra modes other than DC_PRED for blocks with low variance
3386 // Threshold for intra skipping based on source variance
3387 // TODO(debargha): Specialize the threshold for super block sizes
3388 static const int skip_intra_var_thresh[BLOCK_SIZES] = {
3389 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3391 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3392 this_mode != DC_PRED &&
3393 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3395 // Only search the oblique modes if the best so far is
3396 // one of the neighboring directional modes
3397 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3398 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3399 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3402 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3403 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3406 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3407 bsize, tx_cache, best_rd);
3409 if (rate_y == INT_MAX)
3412 uv_tx = MIN(mbmi->txfm_size, max_uv_txsize_lookup[bsize]);
3413 if (rate_uv_intra[uv_tx] == INT_MAX) {
3414 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
3415 &rate_uv_tokenonly[uv_tx],
3416 &dist_uv[uv_tx], &skip_uv[uv_tx],
3420 rate_uv = rate_uv_tokenonly[uv_tx];
3421 distortion_uv = dist_uv[uv_tx];
3422 skippable = skippable && skip_uv[uv_tx];
3423 mbmi->uv_mode = mode_uv[uv_tx];
3425 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3426 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
3427 rate2 += intra_cost_penalty;
3428 distortion2 = distortion_y + distortion_uv;
3429 } else if (this_mode == SPLITMV) {
3430 const int is_comp_pred = second_ref_frame > 0;
3433 int64_t this_rd_thresh;
3434 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3435 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3436 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3437 int tmp_best_skippable = 0;
3438 int switchable_filter_index;
3439 int_mv *second_ref = is_comp_pred ?
3440 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3441 union b_mode_info tmp_best_bmodes[16];
3442 MB_MODE_INFO tmp_best_mbmode;
3443 PARTITION_INFO tmp_best_partition;
3444 BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS];
3445 int pred_exists = 0;
3448 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3449 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3451 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3452 if (ref_frame != best_inter_ref_frame &&
3453 second_ref_frame != best_inter_ref_frame)
3457 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3458 cpi->rd_threshes[bsize][THR_NEWMV] :
3459 cpi->rd_threshes[bsize][THR_NEWA];
3460 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3461 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
3462 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
3464 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
3465 for (switchable_filter_index = 0;
3466 switchable_filter_index < VP9_SWITCHABLE_FILTERS;
3467 ++switchable_filter_index) {
3470 mbmi->interp_filter = switchable_filter_index;
3471 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3473 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3474 &mbmi->ref_mvs[ref_frame][0],
3477 &rate, &rate_y, &distortion,
3478 &skippable, &total_sse,
3479 (int)this_rd_thresh, seg_mvs,
3480 bsi, switchable_filter_index,
3483 if (tmp_rd == INT64_MAX)
3485 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
3486 rs = get_switchable_rate(x);
3487 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3488 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
3489 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
3490 if (cm->mcomp_filter_type == SWITCHABLE)
3493 newbest = (tmp_rd < tmp_best_rd);
3495 tmp_best_filter = mbmi->interp_filter;
3496 tmp_best_rd = tmp_rd;
3498 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
3499 (mbmi->interp_filter == cm->mcomp_filter_type &&
3500 cm->mcomp_filter_type != SWITCHABLE)) {
3501 tmp_best_rdu = tmp_rd;
3502 tmp_best_rate = rate;
3503 tmp_best_ratey = rate_y;
3504 tmp_best_distortion = distortion;
3505 tmp_best_sse = total_sse;
3506 tmp_best_skippable = skippable;
3507 tmp_best_mbmode = *mbmi;
3508 tmp_best_partition = *x->partition_info;
3509 for (i = 0; i < 4; i++)
3510 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
3512 if (switchable_filter_index == 0 &&
3513 cpi->sf.use_rd_breakout &&
3514 best_rd < INT64_MAX) {
3515 if (tmp_best_rdu / 2 > best_rd) {
3516 // skip searching the other filters if the first is
3517 // already substantially larger than the best so far
3518 tmp_best_filter = mbmi->interp_filter;
3519 tmp_best_rdu = INT64_MAX;
3524 } // switchable_filter_index loop
3526 if (tmp_best_rdu == INT64_MAX)
3529 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
3530 tmp_best_filter : cm->mcomp_filter_type);
3531 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3533 // Handles the special case when a filter that is not in the
3534 // switchable list (bilinear, 6-tap) is indicated at the frame level
3535 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3536 &mbmi->ref_mvs[ref_frame][0],
3539 &rate, &rate_y, &distortion,
3540 &skippable, &total_sse,
3541 (int)this_rd_thresh, seg_mvs,
3544 if (tmp_rd == INT64_MAX)
3547 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
3548 int rs = get_switchable_rate(x);
3549 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
3551 tmp_rd = tmp_best_rdu;
3552 total_sse = tmp_best_sse;
3553 rate = tmp_best_rate;
3554 rate_y = tmp_best_ratey;
3555 distortion = tmp_best_distortion;
3556 skippable = tmp_best_skippable;
3557 *mbmi = tmp_best_mbmode;
3558 *x->partition_info = tmp_best_partition;
3559 for (i = 0; i < 4; i++)
3560 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
3564 distortion2 += distortion;
3566 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3567 rate2 += get_switchable_rate(x);
3569 if (!mode_excluded) {
3571 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
3573 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
3575 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
3577 if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
3579 // If even the 'Y' rd value of split is higher than best so far
3580 // then dont bother looking at UV
3581 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3584 super_block_uvrd(cm, x, &rate_uv, &distortion_uv, &uv_skippable,
3585 &uv_sse, BLOCK_8X8);
3587 distortion2 += distortion_uv;
3588 skippable = skippable && uv_skippable;
3589 total_sse += uv_sse;
3591 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3592 for (i = 0; i < TX_MODES; ++i)
3593 tx_cache[i] = tx_cache[ONLY_4X4];
3596 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3597 this_rd = handle_inter_mode(cpi, x, bsize,
3599 &rate2, &distortion2, &skippable,
3600 &rate_y, &distortion_y,
3601 &rate_uv, &distortion_uv,
3602 &mode_excluded, &disable_skip,
3603 &tmp_best_filter, frame_mv,
3605 single_newmv, &total_sse, best_rd);
3606 if (this_rd == INT64_MAX)
3610 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3611 rate2 += compmode_cost;
3614 // Estimate the reference frame signaling cost and add it
3615 // to the rolling cost variable.
3616 if (second_ref_frame > INTRA_FRAME) {
3617 rate2 += ref_costs_comp[ref_frame];
3619 rate2 += ref_costs_single[ref_frame];
3622 if (!disable_skip) {
3623 // Test for the condition where skip block will be activated
3624 // because there are no non zero coefficients and make any
3625 // necessary adjustment for rate. Ignore if skip is coded at
3626 // segment level as the cost wont have been added in.
3627 // Is Mb level skip allowed (i.e. not coded at segment level).
3628 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3631 if (skippable && bsize >= BLOCK_8X8) {
3632 // Back out the coefficient coding costs
3633 rate2 -= (rate_y + rate_uv);
3634 // for best yrd calculation
3637 if (mb_skip_allowed) {
3640 // Cost the skip mb case
3641 vp9_prob skip_prob =
3642 vp9_get_pred_prob_mbskip(cm, xd);
3645 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3646 rate2 += prob_skip_cost;
3649 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3650 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3651 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3652 // Add in the cost of the no skip flag.
3653 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3655 rate2 += prob_skip_cost;
3657 // FIXME(rbultje) make this work for splitmv also
3658 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3660 rate2 += prob_skip_cost;
3661 distortion2 = total_sse;
3662 assert(total_sse >= 0);
3663 rate2 -= (rate_y + rate_uv);
3668 } else if (mb_skip_allowed) {
3669 // Add in the cost of the no skip flag.
3670 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3672 rate2 += prob_skip_cost;
3675 // Calculate the final RD estimate for this mode.
3676 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3679 // Keep record of best intra rd
3680 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME &&
3681 is_intra_mode(xd->mode_info_context->mbmi.mode) &&
3682 this_rd < best_intra_rd) {
3683 best_intra_rd = this_rd;
3684 best_intra_mode = xd->mode_info_context->mbmi.mode;
3686 // Keep record of best inter rd with single reference
3687 if (xd->mode_info_context->mbmi.ref_frame[0] > INTRA_FRAME &&
3688 xd->mode_info_context->mbmi.ref_frame[1] == NONE &&
3690 this_rd < best_inter_rd) {
3691 best_inter_rd = this_rd;
3692 best_inter_ref_frame = ref_frame;
3693 // best_inter_mode = xd->mode_info_context->mbmi.mode;
3696 if (!disable_skip && ref_frame == INTRA_FRAME) {
3697 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3698 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3699 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
3700 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3703 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3704 // Store the respective mode distortions for later use.
3705 if (mode_distortions[this_mode] == -1
3706 || distortion2 < mode_distortions[this_mode]) {
3707 mode_distortions[this_mode] = distortion2;
3709 if (frame_distortions[ref_frame] == -1
3710 || distortion2 < frame_distortions[ref_frame]) {
3711 frame_distortions[ref_frame] = distortion2;
3715 // Did this mode help.. i.e. is it the new best mode
3716 if (this_rd < best_rd || x->skip) {
3717 if (!mode_excluded) {
3718 // Note index of best mode so far
3719 best_mode_index = mode_index;
3721 if (ref_frame == INTRA_FRAME) {
3722 /* required for left and above block mv */
3723 mbmi->mv[0].as_int = 0;
3726 *returnrate = rate2;
3727 *returndistortion = distortion2;
3729 best_yrd = best_rd -
3730 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3731 best_mbmode = *mbmi;
3732 best_skip2 = this_skip2;
3733 best_partition = *x->partition_info;
3735 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3736 for (i = 0; i < 4; i++)
3737 best_bmodes[i] = xd->mode_info_context->bmi[i];
3739 // TODO(debargha): enhance this test with a better distortion prediction
3740 // based on qp, activity mask and history
3741 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
3742 const int qstep = xd->plane[0].dequant[1];
3743 // TODO(debargha): Enhance this by specializing for each mode_index
3745 if (x->source_variance < UINT_MAX) {
3746 const int var_adjust = (x->source_variance < 16);
3747 scale -= var_adjust;
3749 if (ref_frame > INTRA_FRAME &&
3750 distortion2 * scale < qstep * qstep) {
3756 // Testing this mode gave rise to an improvement in best error score.
3757 // Lower threshold a bit for next time
3758 cpi->rd_thresh_mult[mode_index] =
3759 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3760 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3761 cpi->rd_threshes[mode_index] =
3762 (cpi->rd_baseline_thresh[mode_index] >> 7)
3763 * cpi->rd_thresh_mult[mode_index];
3766 // If the mode did not help improve the best error case then
3767 // raise the threshold for testing that mode next time around.
3769 cpi->rd_thresh_mult[mode_index] += 4;
3771 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3772 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3774 cpi->rd_threshes[mode_index] =
3775 (cpi->rd_baseline_thresh[mode_index] >> 7)
3776 * cpi->rd_thresh_mult[mode_index];
3780 /* keep record of best compound/single-only prediction */
3781 if (!disable_skip && ref_frame != INTRA_FRAME) {
3782 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3784 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3785 single_rate = rate2 - compmode_cost;
3786 hybrid_rate = rate2;
3788 single_rate = rate2;
3789 hybrid_rate = rate2 + compmode_cost;
3792 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3793 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3795 if (second_ref_frame <= INTRA_FRAME &&
3796 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3797 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3798 } else if (second_ref_frame > INTRA_FRAME &&
3799 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3800 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3802 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3803 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3806 /* keep record of best filter type */
3807 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3808 cm->mcomp_filter_type != BILINEAR) {
3809 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3810 VP9_SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3811 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
3813 // In cases of poor prediction, filter_cache[] can contain really big
3814 // values, which actually are bigger than this_rd itself. This can
3815 // cause negative best_filter_rd[] values, which is obviously silly.
3816 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3817 if (cpi->rd_filter_cache[i] >= ref)
3818 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3819 else // FIXME(rbultje) do this for comppred also
3820 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
3821 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3825 /* keep record of best txfm size */
3826 if (bsize < BLOCK_32X32) {
3827 if (bsize < BLOCK_16X16) {
3828 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3829 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
3830 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3832 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3834 if (!mode_excluded && this_rd != INT64_MAX) {
3835 for (i = 0; i < TX_MODES; i++) {
3836 int64_t adj_rd = INT64_MAX;
3837 if (this_mode != I4X4_PRED) {
3838 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3843 if (adj_rd < best_tx_rd[i])
3844 best_tx_rd[i] = adj_rd;
3851 if (x->skip && !comp_pred)
3855 if (best_rd >= best_rd_so_far)
3858 // If we used an estimate for the uv intra rd in the loop above...
3859 if (cpi->sf.use_uv_intra_rd_estimate) {
3860 // Do Intra UV best rd mode selection if best mode choice above was intra.
3861 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3862 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
3863 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
3864 &rate_uv_tokenonly[uv_tx_size],
3865 &dist_uv[uv_tx_size],
3866 &skip_uv[uv_tx_size],
3867 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
3871 // If indicated then mark the index of the chosen mode to be inspected at
3872 // other block sizes.
3873 if (bsize <= cpi->sf.unused_mode_skip_lvl) {
3874 cpi->unused_mode_skip_mask = cpi->unused_mode_skip_mask &
3875 (~((int64_t)1 << best_mode_index));
3878 // If we are using reference masking and the set mask flag is set then
3879 // create the reference frame mask.
3880 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
3881 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
3883 // Flag all modes that have a distortion thats > 2x the best we found at
3885 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3886 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3889 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3890 ctx->modes_with_high_error |= (1 << mode_index);
3894 // Flag all ref frames that have a distortion thats > 2x the best we found at
3896 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3897 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3898 ctx->frames_with_high_error |= (1 << ref_frame);
3902 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
3903 *returnrate = INT_MAX;
3904 *returndistortion = INT_MAX;
3908 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3909 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3910 (best_mbmode.ref_frame[0] == INTRA_FRAME));
3912 // Updating rd_thresh_freq_fact[] here means that the different
3913 // partition/block sizes are handled independently based on the best
3914 // choice for the current partition. It may well be better to keep a scaled
3915 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3916 // combination that wins out.
3917 if (cpi->sf.adaptive_rd_thresh) {
3918 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3919 if (mode_index == best_mode_index) {
3920 cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
3922 cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
3923 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3924 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
3925 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3926 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
3932 // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
3934 // Reduce the activation RD thresholds for the best choice mode
3935 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
3936 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
3937 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
3939 cpi->rd_thresh_mult[best_mode_index] =
3940 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
3941 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
3942 cpi->rd_threshes[best_mode_index] =
3943 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
3948 *mbmi = best_mbmode;
3949 x->skip |= best_skip2;
3950 if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
3951 best_mbmode.sb_type < BLOCK_8X8) {
3952 for (i = 0; i < 4; i++)
3953 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3956 if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
3957 best_mbmode.sb_type < BLOCK_8X8) {
3958 for (i = 0; i < 4; i++)
3959 xd->mode_info_context->bmi[i].as_mv[0].as_int =
3960 best_bmodes[i].as_mv[0].as_int;
3962 if (mbmi->ref_frame[1] > 0)
3963 for (i = 0; i < 4; i++)
3964 xd->mode_info_context->bmi[i].as_mv[1].as_int =
3965 best_bmodes[i].as_mv[1].as_int;
3967 *x->partition_info = best_partition;
3969 mbmi->mv[0].as_int = xd->mode_info_context->bmi[3].as_mv[0].as_int;
3970 mbmi->mv[1].as_int = xd->mode_info_context->bmi[3].as_mv[1].as_int;
3973 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3974 if (best_pred_rd[i] == INT64_MAX)
3975 best_pred_diff[i] = INT_MIN;
3977 best_pred_diff[i] = best_rd - best_pred_rd[i];
3981 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
3982 if (best_filter_rd[i] == INT64_MAX)
3983 best_filter_diff[i] = 0;
3985 best_filter_diff[i] = best_rd - best_filter_rd[i];
3987 if (cm->mcomp_filter_type == SWITCHABLE)
3988 assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);
3990 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
3994 for (i = 0; i < TX_MODES; i++) {
3995 if (best_tx_rd[i] == INT64_MAX)
3996 best_tx_diff[i] = 0;
3998 best_tx_diff[i] = best_rd - best_tx_rd[i];
4001 vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff));
4004 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
4006 store_coding_context(x, ctx, best_mode_index,
4008 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4009 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4010 mbmi->ref_frame[1]][0],
4011 best_pred_diff, best_tx_diff, best_filter_diff);