2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
17 #include "vp9/common/vp9_pragmas.h"
18 #include "vp9/encoder/vp9_tokenize.h"
19 #include "vp9/encoder/vp9_treewriter.h"
20 #include "vp9/encoder/vp9_onyx_int.h"
21 #include "vp9/encoder/vp9_modecosts.h"
22 #include "vp9/encoder/vp9_encodeintra.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_findnearmv.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_quantize.h"
30 #include "vp9/encoder/vp9_variance.h"
31 #include "vp9/encoder/vp9_mcomp.h"
32 #include "vp9/encoder/vp9_rdopt.h"
33 #include "vp9/encoder/vp9_ratectrl.h"
34 #include "vpx_mem/vpx_mem.h"
35 #include "vp9/common/vp9_systemdependent.h"
36 #include "vp9/encoder/vp9_encodemv.h"
37 #include "vp9/common/vp9_seg_common.h"
38 #include "vp9/common/vp9_pred_common.h"
39 #include "vp9/common/vp9_entropy.h"
41 #include "vp9/common/vp9_mvref_common.h"
42 #include "vp9/common/vp9_common.h"
44 #define INVALID_MV 0x80008000
46 /* Factor to weigh the rate for switchable interp filters */
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
49 DECLARE_ALIGNED(16, extern const uint8_t,
50 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
52 #define I4X4_PRED 0x8000
53 #define SPLITMV 0x10000
55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 {NEARESTMV, LAST_FRAME, NONE},
57 {NEARESTMV, ALTREF_FRAME, NONE},
58 {NEARESTMV, GOLDEN_FRAME, NONE},
59 {NEWMV, LAST_FRAME, NONE},
60 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
61 {NEARMV, LAST_FRAME, NONE},
62 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
64 {DC_PRED, INTRA_FRAME, NONE},
66 {NEWMV, GOLDEN_FRAME, NONE},
67 {NEWMV, ALTREF_FRAME, NONE},
68 {NEARMV, ALTREF_FRAME, NONE},
70 {TM_PRED, INTRA_FRAME, NONE},
72 {NEARMV, LAST_FRAME, ALTREF_FRAME},
73 {NEWMV, LAST_FRAME, ALTREF_FRAME},
74 {NEARMV, GOLDEN_FRAME, NONE},
75 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
76 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
78 {SPLITMV, LAST_FRAME, NONE},
79 {SPLITMV, GOLDEN_FRAME, NONE},
80 {SPLITMV, ALTREF_FRAME, NONE},
81 {SPLITMV, LAST_FRAME, ALTREF_FRAME},
82 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
84 {ZEROMV, LAST_FRAME, NONE},
85 {ZEROMV, GOLDEN_FRAME, NONE},
86 {ZEROMV, ALTREF_FRAME, NONE},
87 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
88 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
90 {I4X4_PRED, INTRA_FRAME, NONE},
91 {H_PRED, INTRA_FRAME, NONE},
92 {V_PRED, INTRA_FRAME, NONE},
93 {D135_PRED, INTRA_FRAME, NONE},
94 {D27_PRED, INTRA_FRAME, NONE},
95 {D153_PRED, INTRA_FRAME, NONE},
96 {D63_PRED, INTRA_FRAME, NONE},
97 {D117_PRED, INTRA_FRAME, NONE},
98 {D45_PRED, INTRA_FRAME, NONE},
101 // The baseline rd thresholds for breaking out of the rd loop for
102 // certain modes are assumed to be based on 8x8 blocks.
103 // This table is used to correct for blocks size.
104 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105 static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
106 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
108 #define BASE_RD_THRESH_FREQ_FACT 16
109 #define MAX_RD_THRESH_FREQ_FACT 32
110 #define MAX_RD_THRESH_FREQ_INC 1
112 static void fill_token_costs(vp9_coeff_cost *c,
113 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
116 for (t = TX_4X4; t <= TX_32X32; t++)
117 for (i = 0; i < BLOCK_TYPES; i++)
118 for (j = 0; j < REF_TYPES; j++)
119 for (k = 0; k < COEF_BANDS; k++)
120 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
121 vp9_prob probs[ENTROPY_NODES];
122 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
125 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
127 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
128 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
132 static const int rd_iifactor[32] = {
133 4, 4, 3, 2, 1, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0,
139 // 3* dc_qlookup[Q]*dc_qlookup[Q];
141 /* values are now correlated to quantizer */
142 static int sad_per_bit16lut[QINDEX_RANGE];
143 static int sad_per_bit4lut[QINDEX_RANGE];
145 void vp9_init_me_luts() {
148 // Initialize the sad lut tables using a formulaic calculation for now
149 // This is to make it easier to resolve the impact of experimental changes
150 // to the quantizer tables.
151 for (i = 0; i < QINDEX_RANGE; i++) {
152 sad_per_bit16lut[i] =
153 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
154 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
158 static int compute_rd_mult(int qindex) {
159 const int q = vp9_dc_quant(qindex, 0);
160 return (11 * q * q) >> 2;
163 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
164 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
165 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
169 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
172 vp9_clear_system_state(); // __asm emms;
174 // Further tests required to see if optimum is different
175 // for key frames, golden frames and arf frames.
176 // if (cpi->common.refresh_golden_frame ||
177 // cpi->common.refresh_alt_ref_frame)
178 qindex = clamp(qindex, 0, MAXQ);
180 cpi->RDMULT = compute_rd_mult(qindex);
181 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
182 if (cpi->twopass.next_iiratio > 31)
183 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
186 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
188 cpi->mb.errorperbit = cpi->RDMULT >> 6;
189 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
191 vp9_set_speed_features(cpi);
193 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
198 if (cpi->RDMULT > 1000) {
202 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
203 for (i = 0; i < MAX_MODES; ++i) {
204 // Threshold here seem unecessarily harsh but fine given actual
205 // range of values used for cpi->sf.thresh_mult[]
206 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
208 // *4 relates to the scaling of rd_thresh_block_size_factor[]
209 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
210 cpi->rd_threshes[bsize][i] =
211 cpi->sf.thresh_mult[i] * q *
212 rd_thresh_block_size_factor[bsize] / (4 * 100);
214 cpi->rd_threshes[bsize][i] = INT_MAX;
216 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
218 if (cpi->sf.adaptive_rd_thresh)
219 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
221 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
227 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
228 for (i = 0; i < MAX_MODES; i++) {
229 // Threshold here seem unecessarily harsh but fine given actual
230 // range of values used for cpi->sf.thresh_mult[]
231 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
233 if (cpi->sf.thresh_mult[i] < thresh_max) {
234 cpi->rd_threshes[bsize][i] =
235 cpi->sf.thresh_mult[i] * q *
236 rd_thresh_block_size_factor[bsize] / 4;
238 cpi->rd_threshes[bsize][i] = INT_MAX;
240 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
242 if (cpi->sf.adaptive_rd_thresh)
243 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
245 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
250 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
252 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
253 vp9_cost_tokens(cpi->mb.partition_cost[i],
254 cpi->common.fc.partition_prob[cpi->common.frame_type][i],
257 /*rough estimate for costing*/
258 vp9_init_mode_costs(cpi);
260 if (cpi->common.frame_type != KEY_FRAME) {
261 vp9_build_nmv_cost_table(
262 cpi->mb.nmvjointcost,
263 cpi->mb.e_mbd.allow_high_precision_mv ?
264 cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
265 &cpi->common.fc.nmvc,
266 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
268 for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
269 MB_PREDICTION_MODE m;
271 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
272 cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
273 cost_token(vp9_inter_mode_tree,
274 cpi->common.fc.inter_mode_probs[i],
275 vp9_inter_mode_encodings - NEARESTMV + m);
280 static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
281 return bsize_from_dim_lookup[bwl][bhl];
284 static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
285 struct macroblockd_plane *pd) {
286 return get_block_size(plane_block_width_log2by4(bsize, pd),
287 plane_block_height_log2by4(bsize, pd));
290 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
291 const double *tab1, const double *tab2,
292 double *v1, double *v2) {
293 double y = x * inv_step;
296 *v1 = tab1[ntab - 1];
297 *v2 = tab2[ntab - 1];
300 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
301 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
305 static void model_rd_norm(double x, double *R, double *D) {
306 static const int inv_tab_step = 8;
307 static const int tab_size = 120;
308 // NOTE: The tables below must be of the same size
311 // This table models the rate for a Laplacian source
312 // source with given variance when quantized with a uniform quantizer
313 // with given stepsize. The closed form expression is:
314 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
315 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
316 // and H(x) is the binary entropy function.
317 static const double rate_tab[] = {
318 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
319 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
320 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
321 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
322 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
323 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
324 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
325 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
326 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
327 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
328 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
329 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
330 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
331 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
332 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
334 // Normalized distortion
335 // This table models the normalized distortion for a Laplacian source
336 // source with given variance when quantized with a uniform quantizer
337 // with given stepsize. The closed form expression is:
338 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
339 // where x = qpstep / sqrt(variance)
340 // Note the actual distortion is Dn * variance.
341 static const double dist_tab[] = {
342 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
343 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
344 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
345 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
346 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
347 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
348 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
349 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
350 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
351 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
352 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
353 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
354 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
355 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
356 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
359 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
360 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
361 assert(sizeof(rate_tab) == sizeof(dist_tab));
364 linear_interpolate2(x, tab_size, inv_tab_step,
365 rate_tab, dist_tab, R, D);
368 static void model_rd_from_var_lapndz(int var, int n, int qstep,
369 int *rate, int64_t *dist) {
370 // This function models the rate and distortion for a Laplacian
371 // source with given variance when quantized with a uniform quantizer
372 // with given stepsize. The closed form expressions are in:
373 // Hang and Chen, "Source Model for transform video coder and its
374 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
375 // Sys. for Video Tech., April 1997.
376 vp9_clear_system_state();
377 if (var == 0 || n == 0) {
382 double s2 = (double) var / n;
383 double x = qstep / sqrt(s2);
384 model_rd_norm(x, &R, &D);
385 *rate = ((n << 8) * R + 0.5);
386 *dist = (var * D + 0.5);
388 vp9_clear_system_state();
391 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
392 MACROBLOCK *x, MACROBLOCKD *xd,
393 int *out_rate_sum, int64_t *out_dist_sum) {
394 // Note our transform coeffs are 8 times an orthogonal transform.
395 // Hence quantizer step is also 8 times. To get effective quantizer
396 // we need to divide by 8 before sending to modeling function.
397 int i, rate_sum = 0, dist_sum = 0;
399 for (i = 0; i < MAX_MB_PLANE; ++i) {
400 struct macroblock_plane *const p = &x->plane[i];
401 struct macroblockd_plane *const pd = &xd->plane[i];
403 // TODO(dkovalev) the same code in get_plane_block_size
404 const int bwl = plane_block_width_log2by4(bsize, pd);
405 const int bhl = plane_block_height_log2by4(bsize, pd);
406 const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
410 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
411 pd->dst.buf, pd->dst.stride, &sse);
412 // sse works better than var, since there is no dc prediction used
413 model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
414 pd->dequant[1] >> 3, &rate, &dist);
420 *out_rate_sum = rate_sum;
421 *out_dist_sum = dist_sum << 4;
424 static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
425 MACROBLOCK *x, MACROBLOCKD *xd,
426 int *out_rate_sum, int64_t *out_dist_sum) {
427 // Note our transform coeffs are 8 times an orthogonal transform.
428 // Hence quantizer step is also 8 times. To get effective quantizer
429 // we need to divide by 8 before sending to modeling function.
430 struct macroblock_plane *const p = &x->plane[0];
431 struct macroblockd_plane *const pd = &xd->plane[0];
433 // TODO(dkovalev) the same code in get_plane_block_size
434 const int bwl = plane_block_width_log2by4(bsize, pd);
435 const int bhl = plane_block_height_log2by4(bsize, pd);
436 const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
440 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
441 pd->dst.buf, pd->dst.stride, &sse);
442 // sse works better than var, since there is no dc prediction used
443 model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
444 pd->dequant[1] >> 3, &rate, &dist);
446 *out_rate_sum = rate;
447 *out_dist_sum = dist << 4;
450 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
452 MACROBLOCK *x, MACROBLOCKD *xd,
453 int *out_rate_sum, int64_t *out_dist_sum,
456 BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
457 struct macroblock_plane *const p = &x->plane[0];
458 struct macroblockd_plane *const pd = &xd->plane[0];
459 const int width = plane_block_width(bsize, pd);
460 const int height = plane_block_height(bsize, pd);
462 int64_t dist_sum = 0;
464 if (tx_size == TX_4X4) {
467 } else if (tx_size == TX_8X8) {
470 } else if (tx_size == TX_16X16) {
473 } else if (tx_size == TX_32X32) {
480 for (j = 0; j < height; j += t) {
481 for (k = 0; k < width; k += t) {
485 (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
487 pd->dst.buf + j * pd->dst.stride + k,
488 pd->dst.stride, &sse);
489 // sse works better than var, since there is no dc prediction used
490 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
494 *out_skip &= (rate < 1024);
497 *out_rate_sum = rate_sum;
498 *out_dist_sum = (dist_sum << 4);
501 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
502 intptr_t block_size, int64_t *ssz) {
504 int64_t error = 0, sqcoeff = 0;
506 for (i = 0; i < block_size; i++) {
507 int this_diff = coeff[i] - dqcoeff[i];
508 error += (unsigned)this_diff * this_diff;
509 sqcoeff += (unsigned) coeff[i] * coeff[i];
516 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
517 * decide whether to include cost of a trailing EOB node or not (i.e. we
518 * can skip this if the last coefficient in this transform block, e.g. the
519 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
521 static const int16_t band_counts[TX_SIZES][8] = {
522 { 1, 2, 3, 4, 3, 16 - 13, 0 },
523 { 1, 2, 3, 4, 11, 64 - 21, 0 },
524 { 1, 2, 3, 4, 11, 256 - 21, 0 },
525 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
528 static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
529 int plane, int block, PLANE_TYPE type,
530 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
532 const int16_t *scan, const int16_t *nb) {
533 MACROBLOCKD *const xd = &mb->e_mbd;
534 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
536 const int16_t *band_count = &band_counts[tx_size][1];
537 const int eob = xd->plane[plane].eobs[block];
538 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
539 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
540 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
541 [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
542 ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
543 uint8_t token_cache[1024];
545 // Check for consistency of tx_size with mode info
546 assert((!type && !plane) || (type && plane));
547 if (type == PLANE_TYPE_Y_WITH_DC) {
548 assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
550 assert(tx_size == get_uv_tx_size(mbmi));
553 pt = combine_entropy_contexts(above_ec, left_ec);
557 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
560 int v, prev_t, band_left = *band_count++;
564 prev_t = vp9_dct_value_tokens_ptr[v].token;
565 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
566 token_cache[0] = vp9_pt_energy_class[prev_t];
570 for (c = 1; c < eob; c++) {
571 const int rc = scan[c];
575 t = vp9_dct_value_tokens_ptr[v].token;
576 pt = get_coef_context(nb, token_cache, c);
577 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
578 token_cache[rc] = vp9_pt_energy_class[t];
581 band_left = *band_count++;
588 pt = get_coef_context(nb, token_cache, c);
589 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
593 // is eob first coefficient;
599 struct rdcost_block_args {
602 ENTROPY_CONTEXT t_above[16];
603 ENTROPY_CONTEXT t_left[16];
612 const int16_t *scan, *nb;
615 static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
616 int ss_txfrm_size, void *arg) {
617 struct rdcost_block_args* args = arg;
618 MACROBLOCK* const x = args->x;
619 MACROBLOCKD* const xd = &x->e_mbd;
620 struct macroblock_plane *const p = &x->plane[0];
621 struct macroblockd_plane *const pd = &xd->plane[0];
623 int shift = args->tx_size == TX_32X32 ? 0 : 2;
624 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
625 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
626 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
628 args->sse += this_sse >> shift;
630 if (x->skip_encode &&
631 xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
632 // TODO(jingning): tune the model to better capture the distortion.
633 int64_t p = (pd->dequant[1] * pd->dequant[1] *
634 (1 << ss_txfrm_size)) >> shift;
640 static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
641 int ss_txfrm_size, void *arg) {
642 struct rdcost_block_args* args = arg;
644 MACROBLOCKD * const xd = &args->x->e_mbd;
646 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
649 args->rate += cost_coeffs(args->cm, args->x, plane, block,
650 xd->plane[plane].plane_type, args->t_above + x_idx,
651 args->t_left + y_idx, args->tx_size,
652 args->scan, args->nb);
655 // FIXME(jingning): need to make the rd test of chroma components consistent
656 // with that of luma component. this function should be deprecated afterwards.
657 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
658 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
659 MACROBLOCKD * const xd = &x->e_mbd;
660 const int bwl = plane_block_width_log2by4(bsize, &xd->plane[plane]);
661 const int bhl = plane_block_height_log2by4(bsize, &xd->plane[plane]);
662 const int bw = 1 << bwl, bh = 1 << bhl;
664 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
665 0, 0, 0, INT64_MAX, 0 };
669 vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
670 sizeof(ENTROPY_CONTEXT) * bw);
671 vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
672 sizeof(ENTROPY_CONTEXT) * bh);
673 args.scan = vp9_default_scan_4x4;
674 args.nb = vp9_default_scan_4x4_neighbors;
677 for (i = 0; i < bw; i += 2)
678 args.t_above[i] = !!*(uint16_t *)&xd->plane[plane].above_context[i];
679 for (i = 0; i < bh; i += 2)
680 args.t_left[i] = !!*(uint16_t *)&xd->plane[plane].left_context[i];
681 args.scan = vp9_default_scan_8x8;
682 args.nb = vp9_default_scan_8x8_neighbors;
685 for (i = 0; i < bw; i += 4)
686 args.t_above[i] = !!*(uint32_t *)&xd->plane[plane].above_context[i];
687 for (i = 0; i < bh; i += 4)
688 args.t_left[i] = !!*(uint32_t *)&xd->plane[plane].left_context[i];
689 args.scan = vp9_default_scan_16x16;
690 args.nb = vp9_default_scan_16x16_neighbors;
693 for (i = 0; i < bw; i += 8)
694 args.t_above[i] = !!*(uint64_t *)&xd->plane[plane].above_context[i];
695 for (i = 0; i < bh; i += 8)
696 args.t_left[i] = !!*(uint64_t *)&xd->plane[plane].left_context[i];
697 args.scan = vp9_default_scan_32x32;
698 args.nb = vp9_default_scan_32x32_neighbors;
704 foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
708 static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
709 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
712 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
713 cost += rdcost_plane(cm, x, plane, bsize, tx_size);
718 static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
719 int shift, int64_t *sse) {
720 struct macroblockd_plane *p = &x->e_mbd.plane[0];
721 const int bwl = plane_block_width_log2by4(bsize, p);
722 const int bhl = plane_block_height_log2by4(bsize, p);
723 int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
724 16 << (bwl + bhl), sse) >> shift;
729 static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
730 int shift, int64_t *sse) {
731 int64_t sum = 0, this_sse;
735 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
736 struct macroblockd_plane *p = &x->e_mbd.plane[plane];
737 const int bwl = plane_block_width_log2by4(bsize, p);
738 const int bhl = plane_block_height_log2by4(bsize, p);
739 sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
740 16 << (bwl + bhl), &this_sse);
747 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
748 int ss_txfrm_size, void *arg) {
749 struct rdcost_block_args *args = arg;
750 MACROBLOCK *const x = args->x;
751 MACROBLOCKD *const xd = &x->e_mbd;
752 struct encode_b_args encode_args = {args->cm, x, NULL};
753 int64_t rd1, rd2, rd;
757 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
758 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
760 if (rd > args->best_rd) {
762 args->rate = INT_MAX;
763 args->dist = INT64_MAX;
764 args->sse = INT64_MAX;
768 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
769 encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
771 xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);
773 dist_block(plane, block, bsize, ss_txfrm_size, args);
774 rate_block(plane, block, bsize, ss_txfrm_size, args);
777 static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
778 int *rate, int64_t *distortion,
779 int *skippable, int64_t *sse,
781 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
782 MACROBLOCKD *const xd = &x->e_mbd;
783 struct macroblockd_plane *const pd = &xd->plane[0];
784 const int bwl = plane_block_width_log2by4(bsize, pd);
785 const int bhl = plane_block_height_log2by4(bsize, pd);
786 const int bw = 1 << bwl, bh = 1 << bhl;
788 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
789 0, 0, 0, ref_best_rd, 0 };
790 xd->mode_info_context->mbmi.txfm_size = tx_size;
793 vpx_memcpy(&args.t_above, pd->above_context,
794 sizeof(ENTROPY_CONTEXT) * bw);
795 vpx_memcpy(&args.t_left, pd->left_context,
796 sizeof(ENTROPY_CONTEXT) * bh);
797 get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0),
798 &args.scan, &args.nb);
801 for (i = 0; i < bw; i += 2)
802 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
803 for (i = 0; i < bh; i += 2)
804 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
805 get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd),
806 &args.scan, &args.nb);
809 for (i = 0; i < bw; i += 4)
810 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
811 for (i = 0; i < bh; i += 4)
812 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
813 get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd),
814 &args.scan, &args.nb);
817 for (i = 0; i < bw; i += 8)
818 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
819 for (i = 0; i < bh; i += 8)
820 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
821 args.scan = vp9_default_scan_32x32;
822 args.nb = vp9_default_scan_32x32_neighbors;
828 foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
829 *distortion = args.dist;
832 *skippable = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
835 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
836 int *rate, int64_t *distortion,
837 int *skip, int64_t *sse,
839 BLOCK_SIZE_TYPE bs) {
840 const TX_SIZE max_txfm_size = TX_32X32
841 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
842 VP9_COMMON *const cm = &cpi->common;
843 MACROBLOCKD *const xd = &x->e_mbd;
844 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
845 if (max_txfm_size == TX_32X32 &&
846 (cm->tx_mode == ALLOW_32X32 ||
847 cm->tx_mode == TX_MODE_SELECT)) {
848 mbmi->txfm_size = TX_32X32;
849 } else if (max_txfm_size >= TX_16X16 &&
850 (cm->tx_mode == ALLOW_16X16 ||
851 cm->tx_mode == ALLOW_32X32 ||
852 cm->tx_mode == TX_MODE_SELECT)) {
853 mbmi->txfm_size = TX_16X16;
854 } else if (cm->tx_mode != ONLY_4X4) {
855 mbmi->txfm_size = TX_8X8;
857 mbmi->txfm_size = TX_4X4;
859 super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
860 &sse[mbmi->txfm_size], ref_best_rd, bs,
862 cpi->txfm_stepdown_count[0]++;
865 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
866 int (*r)[2], int *rate,
867 int64_t *d, int64_t *distortion,
869 int64_t txfm_cache[TX_MODES],
870 BLOCK_SIZE_TYPE bs) {
871 const TX_SIZE max_txfm_size = TX_32X32
872 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
873 VP9_COMMON *const cm = &cpi->common;
874 MACROBLOCKD *const xd = &x->e_mbd;
875 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
876 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
877 int64_t rd[TX_SIZES][2];
881 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
883 for (n = TX_4X4; n <= max_txfm_size; n++) {
885 if (r[n][0] == INT_MAX)
887 for (m = 0; m <= n - (n == max_txfm_size); m++) {
889 r[n][1] += vp9_cost_zero(tx_probs[m]);
891 r[n][1] += vp9_cost_one(tx_probs[m]);
895 assert(skip_prob > 0);
896 s0 = vp9_cost_bit(skip_prob, 0);
897 s1 = vp9_cost_bit(skip_prob, 1);
899 for (n = TX_4X4; n <= max_txfm_size; n++) {
900 if (d[n] == INT64_MAX) {
901 rd[n][0] = rd[n][1] = INT64_MAX;
905 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
907 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
908 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
912 if (max_txfm_size == TX_32X32 &&
913 (cm->tx_mode == ALLOW_32X32 ||
914 (cm->tx_mode == TX_MODE_SELECT &&
915 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
916 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
917 mbmi->txfm_size = TX_32X32;
918 } else if (max_txfm_size >= TX_16X16 &&
919 (cm->tx_mode == ALLOW_16X16 ||
920 cm->tx_mode == ALLOW_32X32 ||
921 (cm->tx_mode == TX_MODE_SELECT &&
922 rd[TX_16X16][1] < rd[TX_8X8][1] &&
923 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
924 mbmi->txfm_size = TX_16X16;
925 } else if (cm->tx_mode == ALLOW_8X8 ||
926 cm->tx_mode == ALLOW_16X16 ||
927 cm->tx_mode == ALLOW_32X32 ||
928 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
929 mbmi->txfm_size = TX_8X8;
931 mbmi->txfm_size = TX_4X4;
934 *distortion = d[mbmi->txfm_size];
935 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
936 *skip = s[mbmi->txfm_size];
938 txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
939 txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
940 txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
941 txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
942 if (max_txfm_size == TX_32X32 &&
943 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
944 rd[TX_32X32][1] < rd[TX_4X4][1])
945 txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
946 else if (max_txfm_size >= TX_16X16 &&
947 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
948 txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
950 txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
951 rd[TX_4X4][1] : rd[TX_8X8][1];
953 if (max_txfm_size == TX_32X32 &&
954 rd[TX_32X32][1] < rd[TX_16X16][1] &&
955 rd[TX_32X32][1] < rd[TX_8X8][1] &&
956 rd[TX_32X32][1] < rd[TX_4X4][1]) {
957 cpi->txfm_stepdown_count[0]++;
958 } else if (max_txfm_size >= TX_16X16 &&
959 rd[TX_16X16][1] < rd[TX_8X8][1] &&
960 rd[TX_16X16][1] < rd[TX_4X4][1]) {
961 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
962 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
963 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
965 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
969 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
970 int (*r)[2], int *rate,
971 int64_t *d, int64_t *distortion,
972 int *s, int *skip, int64_t *sse,
976 const TX_SIZE max_txfm_size = TX_32X32
977 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
978 VP9_COMMON *const cm = &cpi->common;
979 MACROBLOCKD *const xd = &x->e_mbd;
980 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
981 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
982 int64_t rd[TX_SIZES][2];
985 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
986 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
988 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
990 // for (n = TX_4X4; n <= max_txfm_size; n++)
991 // r[n][0] = (r[n][0] * scale_r[n]);
993 for (n = TX_4X4; n <= max_txfm_size; n++) {
995 for (m = 0; m <= n - (n == max_txfm_size); m++) {
997 r[n][1] += vp9_cost_zero(tx_probs[m]);
999 r[n][1] += vp9_cost_one(tx_probs[m]);
1003 assert(skip_prob > 0);
1004 s0 = vp9_cost_bit(skip_prob, 0);
1005 s1 = vp9_cost_bit(skip_prob, 1);
1007 for (n = TX_4X4; n <= max_txfm_size; n++) {
1009 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
1011 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
1012 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
1015 for (n = TX_4X4; n <= max_txfm_size; n++) {
1016 rd[n][0] = (scale_rd[n] * rd[n][0]);
1017 rd[n][1] = (scale_rd[n] * rd[n][1]);
1020 if (max_txfm_size == TX_32X32 &&
1021 (cm->tx_mode == ALLOW_32X32 ||
1022 (cm->tx_mode == TX_MODE_SELECT &&
1023 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
1024 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
1025 rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
1026 mbmi->txfm_size = TX_32X32;
1027 } else if (max_txfm_size >= TX_16X16 &&
1028 (cm->tx_mode == ALLOW_16X16 ||
1029 cm->tx_mode == ALLOW_32X32 ||
1030 (cm->tx_mode == TX_MODE_SELECT &&
1031 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
1032 rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
1033 mbmi->txfm_size = TX_16X16;
1034 } else if (cm->tx_mode == ALLOW_8X8 ||
1035 cm->tx_mode == ALLOW_16X16 ||
1036 cm->tx_mode == ALLOW_32X32 ||
1037 (cm->tx_mode == TX_MODE_SELECT &&
1038 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
1039 mbmi->txfm_size = TX_8X8;
1041 mbmi->txfm_size = TX_4X4;
1044 if (model_used[mbmi->txfm_size]) {
1045 // Actually encode using the chosen mode if a model was used, but do not
1046 // update the r, d costs
1047 super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
1048 &sse[mbmi->txfm_size], ref_best_rd,
1049 bs, mbmi->txfm_size);
1051 *distortion = d[mbmi->txfm_size];
1052 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
1053 *skip = s[mbmi->txfm_size];
1056 if (max_txfm_size == TX_32X32 &&
1057 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
1058 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
1059 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
1060 cpi->txfm_stepdown_count[0]++;
1061 } else if (max_txfm_size >= TX_16X16 &&
1062 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
1063 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
1064 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
1065 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
1066 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
1068 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
1072 static void super_block_yrd(VP9_COMP *cpi,
1073 MACROBLOCK *x, int *rate, int64_t *distortion,
1074 int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
1075 int64_t txfm_cache[TX_MODES],
1076 int64_t ref_best_rd) {
1077 VP9_COMMON *const cm = &cpi->common;
1078 int r[TX_SIZES][2], s[TX_SIZES];
1079 int64_t d[TX_SIZES], sse[TX_SIZES];
1080 MACROBLOCKD *xd = &x->e_mbd;
1081 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1083 assert(bs == mbmi->sb_type);
1084 if (mbmi->ref_frame[0] > INTRA_FRAME)
1085 vp9_subtract_sby(x, bs);
1087 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
1088 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
1089 mbmi->ref_frame[0] == INTRA_FRAME)) {
1090 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1091 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1094 *psse = sse[mbmi->txfm_size];
1098 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
1099 mbmi->ref_frame[0] > INTRA_FRAME) {
1100 int model_used[TX_SIZES] = {1, 1, 1, 1};
1101 if (bs >= BLOCK_SIZE_SB32X32) {
1102 if (model_used[TX_32X32]) {
1103 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
1104 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
1106 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32],
1107 &s[TX_32X32], &sse[TX_32X32], INT64_MAX,
1111 if (bs >= BLOCK_SIZE_MB16X16) {
1112 if (model_used[TX_16X16]) {
1113 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
1114 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
1116 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16],
1117 &s[TX_16X16], &sse[TX_16X16], INT64_MAX,
1121 if (model_used[TX_8X8]) {
1122 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
1123 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
1125 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
1126 &sse[TX_8X8], INT64_MAX, bs, TX_8X8);
1128 if (model_used[TX_4X4]) {
1129 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
1130 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
1132 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
1133 &sse[TX_4X4], INT64_MAX, bs, TX_4X4);
1135 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
1136 skip, sse, ref_best_rd, bs, model_used);
1138 if (bs >= BLOCK_SIZE_SB32X32)
1139 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32],
1140 &s[TX_32X32], &sse[TX_32X32], ref_best_rd,
1142 if (bs >= BLOCK_SIZE_MB16X16)
1143 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16],
1144 &s[TX_16X16], &sse[TX_16X16], ref_best_rd,
1146 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
1147 &sse[TX_8X8], ref_best_rd, bs, TX_8X8);
1148 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
1149 &sse[TX_4X4], ref_best_rd, bs, TX_4X4);
1150 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1151 skip, txfm_cache, bs);
1154 *psse = sse[mbmi->txfm_size];
1157 static int conditional_skipintra(MB_PREDICTION_MODE mode,
1158 MB_PREDICTION_MODE best_intra_mode) {
1159 if (mode == D117_PRED &&
1160 best_intra_mode != V_PRED &&
1161 best_intra_mode != D135_PRED)
1163 if (mode == D63_PRED &&
1164 best_intra_mode != V_PRED &&
1165 best_intra_mode != D45_PRED)
1167 if (mode == D27_PRED &&
1168 best_intra_mode != H_PRED &&
1169 best_intra_mode != D45_PRED)
1171 if (mode == D153_PRED &&
1172 best_intra_mode != H_PRED &&
1173 best_intra_mode != D135_PRED)
1178 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1179 MB_PREDICTION_MODE *best_mode,
1181 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1182 int *bestrate, int *bestratey,
1183 int64_t *bestdistortion,
1184 BLOCK_SIZE_TYPE bsize,
1185 int64_t rd_thresh) {
1186 MB_PREDICTION_MODE mode;
1187 MACROBLOCKD *xd = &x->e_mbd;
1188 int64_t best_rd = rd_thresh;
1191 VP9_COMMON *const cm = &cpi->common;
1192 struct macroblock_plane *p = &x->plane[0];
1193 struct macroblockd_plane *pd = &xd->plane[0];
1194 const int src_stride = p->src.stride;
1195 const int dst_stride = pd->dst.stride;
1196 uint8_t *src_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
1197 p->src.buf, src_stride);
1198 uint8_t *dst_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
1199 pd->dst.buf, dst_stride);
1200 int16_t *src_diff, *coeff;
1202 ENTROPY_CONTEXT ta[2], tempa[2];
1203 ENTROPY_CONTEXT tl[2], templ[2];
1204 TX_TYPE tx_type = DCT_DCT;
1205 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1206 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1207 int idx, idy, block;
1208 uint8_t best_dst[8 * 8];
1212 vpx_memcpy(ta, a, sizeof(ta));
1213 vpx_memcpy(tl, l, sizeof(tl));
1214 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
1216 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1219 // Only do the oblique modes if the best so far is
1220 // one of the neighboring directional modes
1221 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1222 if (conditional_skipintra(mode, *best_mode))
1226 rate = bmode_costs[mode];
1229 vpx_memcpy(tempa, ta, sizeof(ta));
1230 vpx_memcpy(templ, tl, sizeof(tl));
1232 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1233 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1235 const int16_t *scan;
1236 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1237 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1239 block = ib + idy * 2 + idx;
1240 xd->mode_info_context->bmi[block].as_mode = mode;
1241 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
1243 coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
1244 vp9_predict_intra_block(xd, block, 1,
1246 x->skip_encode ? src : dst,
1247 x->skip_encode ? src_stride : dst_stride,
1249 vp9_subtract_block(4, 4, src_diff, 8,
1253 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1254 if (tx_type != DCT_DCT) {
1255 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1256 x->quantize_b_4x4(x, block, tx_type, 16);
1258 x->fwd_txm4x4(src_diff, coeff, 16);
1259 x->quantize_b_4x4(x, block, tx_type, 16);
1262 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
1263 ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
1264 tempa + idx, templ + idy, TX_4X4, scan,
1265 vp9_get_coef_neighbors_handle(scan));
1266 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff,
1269 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1272 if (tx_type != DCT_DCT)
1273 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
1274 dst, pd->dst.stride, tx_type);
1276 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
1277 dst, pd->dst.stride);
1282 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1284 if (this_rd < best_rd) {
1287 *bestdistortion = distortion;
1290 vpx_memcpy(a, tempa, sizeof(tempa));
1291 vpx_memcpy(l, templ, sizeof(templ));
1292 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1293 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1294 num_4x4_blocks_wide * 4);
1300 if (best_rd >= rd_thresh || x->skip_encode)
1303 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1304 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1305 num_4x4_blocks_wide * 4);
1310 static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
1311 int *Rate, int *rate_y,
1312 int64_t *Distortion, int64_t best_rd) {
1314 MACROBLOCKD *const xd = &mb->e_mbd;
1315 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1316 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1317 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1320 int64_t distortion = 0;
1322 int64_t total_rd = 0;
1323 ENTROPY_CONTEXT t_above[4], t_left[4];
1325 MODE_INFO *const mic = xd->mode_info_context;
1327 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1328 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1330 bmode_costs = mb->mbmode_cost;
1332 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1333 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1334 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1335 const int mis = xd->mode_info_stride;
1336 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
1337 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
1338 int64_t UNINITIALIZED_IS_SAFE(d), this_rd;
1341 if (cpi->common.frame_type == KEY_FRAME) {
1342 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
1343 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
1344 left_block_mode(mic, i) : DC_PRED;
1346 bmode_costs = mb->y_mode_costs[A][L];
1349 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1350 t_above + idx, t_left + idy,
1352 best_rd - total_rd);
1353 if (this_rd >= best_rd - total_rd)
1356 total_rd += this_rd;
1361 mic->bmi[i].as_mode = best_mode;
1362 for (j = 1; j < num_4x4_blocks_high; ++j)
1363 mic->bmi[i + j * 2].as_mode = best_mode;
1364 for (j = 1; j < num_4x4_blocks_wide; ++j)
1365 mic->bmi[i + j].as_mode = best_mode;
1367 if (total_rd >= best_rd)
1373 *rate_y = tot_rate_y;
1374 *Distortion = distortion;
1375 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode;
1377 return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
1380 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1381 int *rate, int *rate_tokenonly,
1382 int64_t *distortion, int *skippable,
1383 BLOCK_SIZE_TYPE bsize,
1384 int64_t txfm_cache[TX_MODES],
1386 MB_PREDICTION_MODE mode;
1387 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
1388 MACROBLOCKD *const xd = &x->e_mbd;
1389 int this_rate, this_rate_tokenonly, s;
1390 int64_t this_distortion, this_rd;
1391 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
1393 int *bmode_costs = x->mbmode_cost;
1395 if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
1396 for (i = 0; i < TX_MODES; i++)
1397 txfm_cache[i] = INT64_MAX;
1400 /* Y Search for intra prediction mode */
1401 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1402 int64_t local_txfm_cache[TX_MODES];
1403 MODE_INFO *const mic = xd->mode_info_context;
1404 const int mis = xd->mode_info_stride;
1406 if (cpi->common.frame_type == KEY_FRAME) {
1407 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
1408 const MB_PREDICTION_MODE L = xd->left_available ?
1409 left_block_mode(mic, 0) : DC_PRED;
1411 bmode_costs = x->y_mode_costs[A][L];
1413 x->e_mbd.mode_info_context->mbmi.mode = mode;
1415 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1416 bsize, local_txfm_cache, best_rd);
1418 if (this_rate_tokenonly == INT_MAX)
1421 this_rate = this_rate_tokenonly + bmode_costs[mode];
1422 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1424 if (this_rd < best_rd) {
1425 mode_selected = mode;
1427 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size;
1429 *rate_tokenonly = this_rate_tokenonly;
1430 *distortion = this_distortion;
1434 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1435 for (i = 0; i < TX_MODES; i++) {
1436 int64_t adj_rd = this_rd + local_txfm_cache[i] -
1437 local_txfm_cache[cpi->common.tx_mode];
1438 if (adj_rd < txfm_cache[i]) {
1439 txfm_cache[i] = adj_rd;
1445 x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
1446 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
1451 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
1452 int *rate, int64_t *distortion,
1453 int *skippable, int64_t *sse,
1454 BLOCK_SIZE_TYPE bsize,
1455 TX_SIZE uv_tx_size) {
1456 MACROBLOCKD *const xd = &x->e_mbd;
1458 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
1459 vp9_encode_intra_block_uv(cm, x, bsize);
1461 vp9_xform_quant_sbuv(cm, x, bsize);
1463 *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2,
1464 sse ? sse : &dummy);
1465 *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
1466 *skippable = vp9_sbuv_is_skippable(xd, bsize);
1469 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
1470 int *rate, int64_t *distortion, int *skippable,
1471 int64_t *sse, BLOCK_SIZE_TYPE bsize) {
1472 MACROBLOCKD *const xd = &x->e_mbd;
1473 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1474 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1476 if (mbmi->ref_frame[0] > INTRA_FRAME)
1477 vp9_subtract_sbuv(x, bsize);
1479 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, sse, bsize,
1483 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1484 int *rate, int *rate_tokenonly,
1485 int64_t *distortion, int *skippable,
1486 BLOCK_SIZE_TYPE bsize) {
1487 MB_PREDICTION_MODE mode;
1488 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
1489 int64_t best_rd = INT64_MAX, this_rd;
1490 int this_rate_tokenonly, this_rate, s;
1491 int64_t this_distortion;
1493 MB_PREDICTION_MODE last_mode = bsize <= BLOCK_SIZE_SB8X8 ?
1494 TM_PRED : cpi->sf.last_chroma_intra_mode;
1496 for (mode = DC_PRED; mode <= last_mode; mode++) {
1497 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
1498 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
1499 &this_distortion, &s, NULL, bsize);
1500 this_rate = this_rate_tokenonly +
1501 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1502 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1504 if (this_rd < best_rd) {
1505 mode_selected = mode;
1508 *rate_tokenonly = this_rate_tokenonly;
1509 *distortion = this_distortion;
1514 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
1519 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1520 int *rate, int *rate_tokenonly,
1521 int64_t *distortion, int *skippable,
1522 BLOCK_SIZE_TYPE bsize) {
1525 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1526 super_block_uvrd(&cpi->common, x, rate_tokenonly,
1527 distortion, skippable, NULL, bsize);
1528 *rate = *rate_tokenonly +
1529 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1530 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1535 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1536 int *rate_uv, int *rate_uv_tokenonly,
1537 int64_t *dist_uv, int *skip_uv,
1538 MB_PREDICTION_MODE *mode_uv) {
1539 MACROBLOCK *const x = &cpi->mb;
1541 // Use an estimated rd for uv_intra based on DC_PRED if the
1542 // appropriate speed flag is set.
1543 if (cpi->sf.use_uv_intra_rd_estimate) {
1544 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1545 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
1547 // Else do a proper rd search for each possible transform size that may
1548 // be considered in the main rd loop.
1550 rd_pick_intra_sbuv_mode(cpi, x,
1551 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1552 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8
1555 *mode_uv = x->e_mbd.mode_info_context->mbmi.uv_mode;
1558 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1560 MACROBLOCK *const x = &cpi->mb;
1561 MACROBLOCKD *const xd = &x->e_mbd;
1562 const int segment_id = xd->mode_info_context->mbmi.segment_id;
1564 // Don't account for mode here if segment skip is enabled.
1565 if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)) {
1566 assert(is_inter_mode(mode));
1567 return x->inter_mode_cost[mode_context][mode - NEARESTMV];
1573 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1574 x->e_mbd.mode_info_context->mbmi.mode = mb;
1575 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
1578 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1579 BLOCK_SIZE_TYPE bsize,
1581 int mi_row, int mi_col,
1582 int_mv single_newmv[MAX_REF_FRAMES],
1584 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1585 BLOCK_SIZE_TYPE bsize,
1586 int mi_row, int mi_col,
1587 int_mv *tmp_mv, int *rate_mv);
1589 static int labels2mode(MACROBLOCK *x, int i,
1590 MB_PREDICTION_MODE this_mode,
1591 int_mv *this_mv, int_mv *this_second_mv,
1592 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1593 int_mv seg_mvs[MAX_REF_FRAMES],
1594 int_mv *best_ref_mv,
1595 int_mv *second_best_ref_mv,
1596 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1597 MACROBLOCKD *const xd = &x->e_mbd;
1598 MODE_INFO *const mic = xd->mode_info_context;
1599 MB_MODE_INFO * mbmi = &mic->mbmi;
1600 int cost = 0, thismvcost = 0;
1602 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1603 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1605 /* We have to be careful retrieving previously-encoded motion vectors.
1606 Ones from this macroblock have to be pulled from the BLOCKD array
1607 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1608 MB_PREDICTION_MODE m;
1610 // the only time we should do costing for new motion vector or mode
1611 // is when we are on a new label (jbb May 08, 2007)
1612 switch (m = this_mode) {
1614 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1615 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1616 102, xd->allow_high_precision_mv);
1617 if (mbmi->ref_frame[1] > 0) {
1618 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1619 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1620 mvjcost, mvcost, 102,
1621 xd->allow_high_precision_mv);
1625 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1626 if (mbmi->ref_frame[1] > 0)
1627 this_second_mv->as_int =
1628 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1631 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1632 if (mbmi->ref_frame[1] > 0)
1633 this_second_mv->as_int =
1634 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1637 this_mv->as_int = 0;
1638 if (mbmi->ref_frame[1] > 0)
1639 this_second_mv->as_int = 0;
1645 cost = cost_mv_ref(cpi, this_mode,
1646 mbmi->mb_mode_context[mbmi->ref_frame[0]]);
1648 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1649 if (mbmi->ref_frame[1] > 0)
1650 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1652 x->partition_info->bmi[i].mode = m;
1653 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1654 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1655 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1656 &mic->bmi[i], sizeof(mic->bmi[i]));
1662 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1667 int64_t *distortion, int64_t *sse,
1668 ENTROPY_CONTEXT *ta,
1669 ENTROPY_CONTEXT *tl) {
1671 VP9_COMMON *const cm = &cpi->common;
1672 MACROBLOCKD *xd = &x->e_mbd;
1673 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1674 const int width = plane_block_width(bsize, &xd->plane[0]);
1675 const int height = plane_block_height(bsize, &xd->plane[0]);
1677 const int src_stride = x->plane[0].src.stride;
1678 uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1679 x->plane[0].src.buf,
1681 int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
1682 x->plane[0].src_diff);
1683 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
1684 uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1685 xd->plane[0].pre[0].buf,
1686 xd->plane[0].pre[0].stride);
1687 uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1688 xd->plane[0].dst.buf,
1689 xd->plane[0].dst.stride);
1690 int64_t thisdistortion = 0, thissse = 0;
1693 vp9_build_inter_predictor(pre,
1694 xd->plane[0].pre[0].stride,
1696 xd->plane[0].dst.stride,
1697 &xd->mode_info_context->bmi[i].as_mv[0],
1698 &xd->scale_factor[0],
1699 width, height, 0, &xd->subpix,
1702 if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
1703 uint8_t* const second_pre =
1704 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1705 xd->plane[0].pre[1].buf,
1706 xd->plane[0].pre[1].stride);
1707 vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
1708 dst, xd->plane[0].dst.stride,
1709 &xd->mode_info_context->bmi[i].as_mv[1],
1710 &xd->scale_factor[1],
1712 &xd->subpix, MV_PRECISION_Q3);
1715 vp9_subtract_block(height, width, src_diff, 8,
1717 dst, xd->plane[0].dst.stride);
1720 for (idy = 0; idy < height / 4; ++idy) {
1721 for (idx = 0; idx < width / 4; ++idx) {
1722 int64_t ssz, rd, rd1, rd2;
1724 k += (idy * 2 + idx);
1725 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1726 x->plane[0].src_diff);
1727 coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
1728 x->fwd_txm4x4(src_diff, coeff, 16);
1729 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1730 thisdistortion += vp9_block_error(coeff,
1731 BLOCK_OFFSET(xd->plane[0].dqcoeff,
1734 thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
1736 tl + (k >> 1), TX_4X4,
1737 vp9_default_scan_4x4,
1738 vp9_default_scan_4x4_neighbors);
1739 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1740 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1746 *distortion = thisdistortion >> 2;
1747 *labelyrate = thisrate;
1748 *sse = thissse >> 2;
1750 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1761 ENTROPY_CONTEXT ta[2];
1762 ENTROPY_CONTEXT tl[2];
1766 int_mv *ref_mv, *second_ref_mv;
1774 MB_PREDICTION_MODE modes[4];
1775 SEG_RDSTAT rdstat[4][VP9_INTER_MODES];
1779 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1781 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1782 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1783 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1784 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1788 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1789 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1790 x->plane[0].src.buf =
1791 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1792 x->plane[0].src.buf,
1793 x->plane[0].src.stride);
1794 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1795 x->e_mbd.plane[0].pre[0].buf =
1796 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1797 x->e_mbd.plane[0].pre[0].buf,
1798 x->e_mbd.plane[0].pre[0].stride);
1799 if (mbmi->ref_frame[1])
1800 x->e_mbd.plane[0].pre[1].buf =
1801 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1802 x->e_mbd.plane[0].pre[1].buf,
1803 x->e_mbd.plane[0].pre[1].stride);
1806 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1807 struct buf_2d orig_pre[2]) {
1808 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1809 x->plane[0].src = orig_src;
1810 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1811 if (mbmi->ref_frame[1])
1812 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1815 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1816 BEST_SEG_INFO *bsi_buf, int filter_idx,
1817 int_mv seg_mvs[4][MAX_REF_FRAMES],
1818 int mi_row, int mi_col) {
1819 int i, j, br = 0, idx, idy;
1820 int64_t bd = 0, block_sse = 0;
1821 MB_PREDICTION_MODE this_mode;
1822 MODE_INFO *mi = x->e_mbd.mode_info_context;
1823 MB_MODE_INFO *const mbmi = &mi->mbmi;
1824 const int label_count = 4;
1825 int64_t this_segment_rd = 0;
1826 int label_mv_thresh;
1827 int segmentyrate = 0;
1828 BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1829 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1830 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1831 vp9_variance_fn_ptr_t *v_fn_ptr;
1832 ENTROPY_CONTEXT t_above[2], t_left[2];
1833 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1835 int subpelmv = 1, have_ref = 0;
1837 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1838 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1840 v_fn_ptr = &cpi->fn_ptr[bsize];
1842 // 64 makes this threshold really big effectively
1843 // making it so that we very rarely check mvs on
1844 // segments. setting this to 1 would make mv thresh
1845 // roughly equal to what it is for macroblocks
1846 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1848 // Segmentation method overheads
1849 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1850 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1851 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1852 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1853 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1854 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1855 MB_PREDICTION_MODE mode_selected = ZEROMV;
1856 int64_t best_rd = INT64_MAX;
1859 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1860 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1861 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1862 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1863 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1865 if (mbmi->ref_frame[1] > 0)
1866 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1867 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1868 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1871 // search for the best motion vector on this segment
1872 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1873 const struct buf_2d orig_src = x->plane[0].src;
1874 struct buf_2d orig_pre[2];
1876 mode_idx = inter_mode_offset(this_mode);
1877 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1879 // if we're near/nearest and mv == 0,0, compare to zeromv
1880 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1881 this_mode == ZEROMV) &&
1882 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1883 (mbmi->ref_frame[1] <= 0 ||
1884 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1885 int rfc = mbmi->mb_mode_context[mbmi->ref_frame[0]];
1886 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1887 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1888 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1890 if (this_mode == NEARMV) {
1893 } else if (this_mode == NEARESTMV) {
1897 assert(this_mode == ZEROMV);
1898 if (mbmi->ref_frame[1] <= 0) {
1900 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1902 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1906 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1907 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1909 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1910 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1916 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1917 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1918 sizeof(bsi->rdstat[i][mode_idx].ta));
1919 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1920 sizeof(bsi->rdstat[i][mode_idx].tl));
1922 // motion search for newmv (single predictor case only)
1923 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV &&
1924 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1927 int thissme, bestsme = INT_MAX;
1928 int sadpb = x->sadperbit4;
1931 /* Is the best so far sufficiently good that we cant justify doing
1932 * and new motion search. */
1933 if (best_rd < label_mv_thresh)
1936 if (cpi->compressor_speed) {
1937 // use previous block's result as next block's MV predictor.
1940 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1943 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1946 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1947 // Take wtd average of the step_params based on the last frame's
1948 // max mv magnitude and the best ref mvs of the current block for
1949 // the given reference.
1951 step_param = (vp9_init_search_range(
1952 cpi, x->max_mv_context[mbmi->ref_frame[0]]) +
1953 cpi->mv_step_param) >> 1;
1955 step_param = (vp9_init_search_range(
1956 cpi, MAX(abs(bsi->mvp.as_mv.row),
1957 abs(bsi->mvp.as_mv.col)) >> 3) +
1958 cpi->mv_step_param) >> 1;
1960 step_param = cpi->mv_step_param;
1963 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1965 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1966 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1968 // adjust src pointer for this block
1970 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1971 sadpb, further_steps, 0, v_fn_ptr,
1972 bsi->ref_mv, &mode_mv[NEWMV]);
1974 // Should we do a full search (best quality only)
1975 if (cpi->compressor_speed == 0) {
1976 /* Check if mvp_full is within the range. */
1977 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1978 x->mv_row_min, x->mv_row_max);
1980 thissme = cpi->full_search_sad(x, &mvp_full,
1981 sadpb, 16, v_fn_ptr,
1982 x->nmvjointcost, x->mvcost,
1985 if (thissme < bestsme) {
1987 mode_mv[NEWMV].as_int =
1988 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1990 /* The full search result is actually worse so re-instate the
1991 * previous best vector */
1992 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1993 mode_mv[NEWMV].as_int;
1997 if (bestsme < INT_MAX) {
2000 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
2001 bsi->ref_mv, x->errorperbit, v_fn_ptr,
2002 x->nmvjointcost, x->mvcost,
2005 // safe motion search result for use in compound prediction
2006 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
2009 // restore src pointers
2010 mi_buf_restore(x, orig_src, orig_pre);
2013 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV &&
2014 mbmi->interp_filter == vp9_switchable_interp[0]) {
2015 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
2016 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
2019 // adjust src pointers
2021 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2023 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
2024 mi_row, mi_col, seg_mvs[i],
2026 seg_mvs[i][mbmi->ref_frame[0]].as_int =
2027 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
2028 seg_mvs[i][mbmi->ref_frame[1]].as_int =
2029 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
2031 // restore src pointers
2032 mi_buf_restore(x, orig_src, orig_pre);
2035 bsi->rdstat[i][mode_idx].brate =
2036 labels2mode(x, i, this_mode, &mode_mv[this_mode],
2037 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
2038 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2041 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
2042 if (num_4x4_blocks_wide > 1)
2043 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
2044 mode_mv[this_mode].as_int;
2045 if (num_4x4_blocks_high > 1)
2046 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
2047 mode_mv[this_mode].as_int;
2048 if (mbmi->ref_frame[1] > 0) {
2049 bsi->rdstat[i][mode_idx].mvs[1].as_int =
2050 second_mode_mv[this_mode].as_int;
2051 if (num_4x4_blocks_wide > 1)
2052 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
2053 second_mode_mv[this_mode].as_int;
2054 if (num_4x4_blocks_high > 1)
2055 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
2056 second_mode_mv[this_mode].as_int;
2059 // Trap vectors that reach beyond the UMV borders
2060 if (mv_check_bounds(x, &mode_mv[this_mode]))
2062 if (mbmi->ref_frame[1] > 0 &&
2063 mv_check_bounds(x, &second_mode_mv[this_mode]))
2066 if (filter_idx > 0) {
2067 BEST_SEG_INFO *ref_bsi = bsi_buf;
2068 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
2069 (mode_mv[this_mode].as_mv.col & 0x0f);
2070 have_ref = mode_mv[this_mode].as_int ==
2071 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
2072 if (mbmi->ref_frame[1] > 0) {
2073 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
2074 (second_mode_mv[this_mode].as_mv.col & 0x0f);
2075 have_ref &= second_mode_mv[this_mode].as_int ==
2076 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
2079 if (filter_idx > 1 && !subpelmv && !have_ref) {
2080 ref_bsi = bsi_buf + 1;
2081 have_ref = mode_mv[this_mode].as_int ==
2082 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
2083 if (mbmi->ref_frame[1] > 0) {
2084 have_ref &= second_mode_mv[this_mode].as_int ==
2085 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
2089 if (!subpelmv && have_ref &&
2090 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2091 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2092 sizeof(SEG_RDSTAT));
2093 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2094 mode_selected = this_mode;
2095 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2101 bsi->rdstat[i][mode_idx].brdcost =
2102 encode_inter_mb_segment(cpi, x,
2103 bsi->segment_rd - this_segment_rd, i,
2104 &bsi->rdstat[i][mode_idx].byrate,
2105 &bsi->rdstat[i][mode_idx].bdist,
2106 &bsi->rdstat[i][mode_idx].bsse,
2107 bsi->rdstat[i][mode_idx].ta,
2108 bsi->rdstat[i][mode_idx].tl);
2109 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2110 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2111 bsi->rdstat[i][mode_idx].brate, 0);
2112 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2113 bsi->rdstat[i][mode_idx].eobs = x->e_mbd.plane[0].eobs[i];
2116 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2117 mode_selected = this_mode;
2118 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2120 } /*for each 4x4 mode*/
2122 if (best_rd == INT64_MAX) {
2124 for (iy = i + 1; iy < 4; ++iy)
2125 for (midx = 0; midx < VP9_INTER_MODES; ++midx)
2126 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2127 bsi->segment_rd = INT64_MAX;
2131 mode_idx = inter_mode_offset(mode_selected);
2132 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2133 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2135 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
2136 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
2137 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2140 br += bsi->rdstat[i][mode_idx].brate;
2141 bd += bsi->rdstat[i][mode_idx].bdist;
2142 block_sse += bsi->rdstat[i][mode_idx].bsse;
2143 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2144 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2146 if (this_segment_rd > bsi->segment_rd) {
2148 for (iy = i + 1; iy < 4; ++iy)
2149 for (midx = 0; midx < VP9_INTER_MODES; ++midx)
2150 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2151 bsi->segment_rd = INT64_MAX;
2155 for (j = 1; j < num_4x4_blocks_high; ++j)
2156 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
2157 &x->partition_info->bmi[i],
2158 sizeof(x->partition_info->bmi[i]));
2159 for (j = 1; j < num_4x4_blocks_wide; ++j)
2160 vpx_memcpy(&x->partition_info->bmi[i + j],
2161 &x->partition_info->bmi[i],
2162 sizeof(x->partition_info->bmi[i]));
2164 } /* for each label */
2168 bsi->segment_yrate = segmentyrate;
2169 bsi->segment_rd = this_segment_rd;
2170 bsi->sse = block_sse;
2172 // update the coding decisions
2173 for (i = 0; i < 4; ++i)
2174 bsi->modes[i] = x->partition_info->bmi[i].mode;
2177 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2178 int_mv *best_ref_mv,
2179 int_mv *second_best_ref_mv,
2183 int64_t *returndistortion,
2184 int *skippable, int64_t *psse,
2186 int_mv seg_mvs[4][MAX_REF_FRAMES],
2187 BEST_SEG_INFO *bsi_buf,
2189 int mi_row, int mi_col) {
2191 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2192 MACROBLOCKD *xd = &x->e_mbd;
2193 MODE_INFO *mi = xd->mode_info_context;
2194 MB_MODE_INFO *mbmi = &mi->mbmi;
2199 bsi->segment_rd = best_rd;
2200 bsi->ref_mv = best_ref_mv;
2201 bsi->second_ref_mv = second_best_ref_mv;
2202 bsi->mvp.as_int = best_ref_mv->as_int;
2203 bsi->mvthresh = mvthresh;
2205 for (i = 0; i < 4; i++)
2206 bsi->modes[i] = ZEROMV;
2208 rd_check_segment_txsize(cpi, x, bsi_buf, filter_idx, seg_mvs, mi_row, mi_col);
2210 if (bsi->segment_rd > best_rd)
2212 /* set it to the best */
2213 for (i = 0; i < 4; i++) {
2214 mode_idx = inter_mode_offset(bsi->modes[i]);
2215 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2216 if (mbmi->ref_frame[1] > 0)
2217 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2218 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2219 x->partition_info->bmi[i].mode = bsi->modes[i];
2223 * used to set mbmi->mv.as_int
2225 *returntotrate = bsi->r;
2226 *returndistortion = bsi->d;
2227 *returnyrate = bsi->segment_yrate;
2228 *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
2230 mbmi->mode = bsi->modes[3];
2232 return bsi->segment_rd;
2235 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2236 uint8_t *ref_y_buffer, int ref_y_stride,
2237 int ref_frame, BLOCK_SIZE_TYPE block_size ) {
2238 MACROBLOCKD *xd = &x->e_mbd;
2239 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2244 int best_sad = INT_MAX;
2245 int this_sad = INT_MAX;
2246 unsigned int max_mv = 0;
2248 uint8_t *src_y_ptr = x->plane[0].src.buf;
2250 int row_offset, col_offset;
2252 // Get the sad for each candidate reference mv
2253 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
2254 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
2256 max_mv = MAX(max_mv,
2257 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2258 // The list is at an end if we see 0 for a second time.
2259 if (!this_mv.as_int && zero_seen)
2261 zero_seen = zero_seen || !this_mv.as_int;
2263 row_offset = this_mv.as_mv.row >> 3;
2264 col_offset = this_mv.as_mv.col >> 3;
2265 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2267 // Find sad for current vector.
2268 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2269 ref_y_ptr, ref_y_stride,
2272 // Note if it is the best so far.
2273 if (this_sad < best_sad) {
2274 best_sad = this_sad;
2279 // Note the index of the mv that worked best in the reference list.
2280 x->mv_best_ref_index[ref_frame] = best_index;
2281 x->max_mv_context[ref_frame] = max_mv;
2284 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2285 unsigned int *ref_costs_single,
2286 unsigned int *ref_costs_comp,
2287 vp9_prob *comp_mode_p) {
2288 VP9_COMMON *const cm = &cpi->common;
2289 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2290 int seg_ref_active = vp9_segfeature_active(&xd->seg, segment_id,
2292 if (seg_ref_active) {
2293 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2294 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2297 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2298 vp9_prob comp_inter_p = 128;
2300 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
2301 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
2302 *comp_mode_p = comp_inter_p;
2307 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2309 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
2310 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2311 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2312 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2314 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2315 base_cost += vp9_cost_bit(comp_inter_p, 0);
2317 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2318 ref_costs_single[ALTREF_FRAME] = base_cost;
2319 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2320 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2321 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2322 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2323 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2325 ref_costs_single[LAST_FRAME] = 512;
2326 ref_costs_single[GOLDEN_FRAME] = 512;
2327 ref_costs_single[ALTREF_FRAME] = 512;
2329 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
2330 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2331 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2333 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2334 base_cost += vp9_cost_bit(comp_inter_p, 1);
2336 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2337 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2339 ref_costs_comp[LAST_FRAME] = 512;
2340 ref_costs_comp[GOLDEN_FRAME] = 512;
2345 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2347 PARTITION_INFO *partition,
2349 int_mv *second_ref_mv,
2350 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2351 int64_t txfm_size_diff[TX_MODES],
2352 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
2353 MACROBLOCKD *const xd = &x->e_mbd;
2355 // Take a snapshot of the coding context so it can be
2356 // restored if we decide to encode this way
2357 ctx->skip = x->skip;
2358 ctx->best_mode_index = mode_index;
2359 ctx->mic = *xd->mode_info_context;
2362 ctx->partition_info = *partition;
2364 ctx->best_ref_mv.as_int = ref_mv->as_int;
2365 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2367 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2368 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2369 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2371 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
2372 // doesn't actually work this way
2373 memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
2374 memcpy(ctx->best_filter_diff, best_filter_diff,
2375 sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
2378 static void setup_pred_block(const MACROBLOCKD *xd,
2379 struct buf_2d dst[MAX_MB_PLANE],
2380 const YV12_BUFFER_CONFIG *src,
2381 int mi_row, int mi_col,
2382 const struct scale_factors *scale,
2383 const struct scale_factors *scale_uv) {
2386 dst[0].buf = src->y_buffer;
2387 dst[0].stride = src->y_stride;
2388 dst[1].buf = src->u_buffer;
2389 dst[2].buf = src->v_buffer;
2390 dst[1].stride = dst[2].stride = src->uv_stride;
2392 dst[3].buf = src->alpha_buffer;
2393 dst[3].stride = src->alpha_stride;
2396 // TODO(jkoleszar): Make scale factors per-plane data
2397 for (i = 0; i < MAX_MB_PLANE; i++) {
2398 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2399 i ? scale_uv : scale,
2400 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2404 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2405 int idx, MV_REFERENCE_FRAME frame_type,
2406 BLOCK_SIZE_TYPE block_size,
2407 int mi_row, int mi_col,
2408 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2409 int_mv frame_near_mv[MAX_REF_FRAMES],
2410 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2411 struct scale_factors scale[MAX_REF_FRAMES]) {
2412 VP9_COMMON *cm = &cpi->common;
2413 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2414 MACROBLOCKD *const xd = &x->e_mbd;
2415 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2417 // set up scaling factors
2418 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2420 scale[frame_type].x_offset_q4 =
2421 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
2422 VP9_REF_SCALE_SHIFT) & 0xf;
2423 scale[frame_type].y_offset_q4 =
2424 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
2425 VP9_REF_SCALE_SHIFT) & 0xf;
2427 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2428 // use the UV scaling factors.
2429 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2430 &scale[frame_type], &scale[frame_type]);
2432 // Gets an initial list of candidate vectors from neighbours and orders them
2433 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
2434 xd->prev_mode_info_context,
2436 mbmi->ref_mvs[frame_type],
2437 cpi->common.ref_frame_sign_bias);
2439 // Candidate refinement carried out at encoder and decoder
2440 vp9_find_best_ref_mvs(xd,
2441 mbmi->ref_mvs[frame_type],
2442 &frame_nearest_mv[frame_type],
2443 &frame_near_mv[frame_type]);
2445 // Further refinement that is encode side only to test the top few candidates
2446 // in full and choose the best as the centre point for subsequent searches.
2447 // The current implementation doesn't support scaling.
2448 if (scale[frame_type].x_scale_fp == VP9_REF_NO_SCALE &&
2449 scale[frame_type].y_scale_fp == VP9_REF_NO_SCALE)
2450 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2451 frame_type, block_size);
2454 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2455 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2456 int fb = get_ref_frame_idx(cpi, ref_frame);
2457 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
2458 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
2459 return scaled_ref_frame;
2462 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
2463 MACROBLOCKD *xd = &x->e_mbd;
2464 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2466 const int c = vp9_get_pred_context_switchable_interp(xd);
2467 const int m = vp9_switchable_interp_map[mbmi->interp_filter];
2468 return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
2471 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2472 BLOCK_SIZE_TYPE bsize,
2473 int mi_row, int mi_col,
2474 int_mv *tmp_mv, int *rate_mv) {
2475 MACROBLOCKD *xd = &x->e_mbd;
2476 VP9_COMMON *cm = &cpi->common;
2477 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2478 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2479 int bestsme = INT_MAX;
2480 int further_steps, step_param;
2481 int sadpb = x->sadperbit16;
2483 int ref = mbmi->ref_frame[0];
2484 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2485 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2487 int tmp_col_min = x->mv_col_min;
2488 int tmp_col_max = x->mv_col_max;
2489 int tmp_row_min = x->mv_row_min;
2490 int tmp_row_max = x->mv_row_max;
2492 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2494 if (scaled_ref_frame) {
2496 // Swap out the reference frame for a version that's been scaled to
2497 // match the resolution of the current frame, allowing the existing
2498 // motion search code to be used without additional modifications.
2499 for (i = 0; i < MAX_MB_PLANE; i++)
2500 backup_yv12[i] = xd->plane[i].pre[0];
2502 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2505 vp9_clamp_mv_min_max(x, &ref_mv);
2507 // Adjust search parameters based on small partitions' result.
2509 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2510 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2511 // adjust search range
2516 // Get prediction MV.
2517 mvp_full.as_int = x->pred_mv.as_int;
2519 // Adjust MV sign if needed.
2520 if (cm->ref_frame_sign_bias[ref]) {
2521 mvp_full.as_mv.col *= -1;
2522 mvp_full.as_mv.row *= -1;
2525 // Work out the size of the first step in the mv step search.
2526 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2527 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2528 // Take wtd average of the step_params based on the last frame's
2529 // max mv magnitude and that based on the best ref mvs of the current
2530 // block for the given reference.
2531 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2532 cpi->mv_step_param) >> 1;
2534 step_param = cpi->mv_step_param;
2536 // mvp_full.as_int = ref_mv[0].as_int;
2538 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
2541 mvp_full.as_mv.col >>= 3;
2542 mvp_full.as_mv.row >>= 3;
2544 // Further step/diamond searches as necessary
2545 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2547 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2548 sadpb, further_steps, 1,
2549 &cpi->fn_ptr[block_size],
2552 x->mv_col_min = tmp_col_min;
2553 x->mv_col_max = tmp_col_max;
2554 x->mv_row_min = tmp_row_min;
2555 x->mv_row_max = tmp_row_max;
2557 if (bestsme < INT_MAX) {
2558 int dis; /* TODO: use dis in distortion calculation later. */
2560 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
2562 &cpi->fn_ptr[block_size],
2563 x->nmvjointcost, x->mvcost,
2566 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
2567 x->nmvjointcost, x->mvcost,
2568 96, xd->allow_high_precision_mv);
2569 if (scaled_ref_frame) {
2571 for (i = 0; i < MAX_MB_PLANE; i++)
2572 xd->plane[i].pre[0] = backup_yv12[i];
2576 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2577 BLOCK_SIZE_TYPE bsize,
2579 int mi_row, int mi_col,
2580 int_mv single_newmv[MAX_REF_FRAMES],
2582 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2583 MACROBLOCKD *xd = &x->e_mbd;
2584 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2585 int refs[2] = { mbmi->ref_frame[0],
2586 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2588 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2590 // Prediction buffer from second frame.
2591 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2593 // Do joint motion search in compound mode to get more accurate mv.
2594 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2595 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
2596 struct buf_2d scaled_first_yv12;
2597 int last_besterr[2] = {INT_MAX, INT_MAX};
2598 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2599 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
2600 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
2602 ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2603 ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2605 if (scaled_ref_frame[0]) {
2607 // Swap out the reference frame for a version that's been scaled to
2608 // match the resolution of the current frame, allowing the existing
2609 // motion search code to be used without additional modifications.
2610 for (i = 0; i < MAX_MB_PLANE; i++)
2611 backup_yv12[i] = xd->plane[i].pre[0];
2612 setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
2615 if (scaled_ref_frame[1]) {
2617 for (i = 0; i < MAX_MB_PLANE; i++)
2618 backup_second_yv12[i] = xd->plane[i].pre[1];
2620 setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL);
2623 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
2625 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
2627 scaled_first_yv12 = xd->plane[0].pre[0];
2629 // Initialize mv using single prediction mode result.
2630 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2631 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2633 // Allow joint search multiple times iteratively for each ref frame
2634 // and break out the search loop if it couldn't find better mv.
2635 for (ite = 0; ite < 4; ite++) {
2636 struct buf_2d ref_yv12[2];
2637 int bestsme = INT_MAX;
2638 int sadpb = x->sadperbit16;
2640 int search_range = 3;
2642 int tmp_col_min = x->mv_col_min;
2643 int tmp_col_max = x->mv_col_max;
2644 int tmp_row_min = x->mv_row_min;
2645 int tmp_row_max = x->mv_row_max;
2648 // Initialized here because of compiler problem in Visual Studio.
2649 ref_yv12[0] = xd->plane[0].pre[0];
2650 ref_yv12[1] = xd->plane[0].pre[1];
2652 // Get pred block from second frame.
2653 vp9_build_inter_predictor(ref_yv12[!id].buf,
2654 ref_yv12[!id].stride,
2656 &frame_mv[refs[!id]],
2657 &xd->scale_factor[!id],
2659 &xd->subpix, MV_PRECISION_Q3);
2661 // Compound motion search on first ref frame.
2663 xd->plane[0].pre[0] = ref_yv12[id];
2664 vp9_clamp_mv_min_max(x, &ref_mv[id]);
2666 // Use mv result from single mode as mvp.
2667 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2669 tmp_mv.as_mv.col >>= 3;
2670 tmp_mv.as_mv.row >>= 3;
2672 // Small-range full-pixel motion search
2673 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2675 &cpi->fn_ptr[block_size],
2676 x->nmvjointcost, x->mvcost,
2677 &ref_mv[id], second_pred,
2680 x->mv_col_min = tmp_col_min;
2681 x->mv_col_max = tmp_col_max;
2682 x->mv_row_min = tmp_row_min;
2683 x->mv_row_max = tmp_row_max;
2685 if (bestsme < INT_MAX) {
2686 int dis; /* TODO: use dis in distortion calculation later. */
2689 bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
2692 &cpi->fn_ptr[block_size],
2693 x->nmvjointcost, x->mvcost,
2694 &dis, &sse, second_pred,
2699 xd->plane[0].pre[0] = scaled_first_yv12;
2701 if (bestsme < last_besterr[id]) {
2702 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2703 last_besterr[id] = bestsme;
2709 // restore the predictor
2710 if (scaled_ref_frame[0]) {
2712 for (i = 0; i < MAX_MB_PLANE; i++)
2713 xd->plane[i].pre[0] = backup_yv12[i];
2716 if (scaled_ref_frame[1]) {
2718 for (i = 0; i < MAX_MB_PLANE; i++)
2719 xd->plane[i].pre[1] = backup_second_yv12[i];
2721 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2722 &mbmi->ref_mvs[refs[0]][0],
2723 x->nmvjointcost, x->mvcost, 96,
2724 x->e_mbd.allow_high_precision_mv);
2725 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2726 &mbmi->ref_mvs[refs[1]][0],
2727 x->nmvjointcost, x->mvcost, 96,
2728 x->e_mbd.allow_high_precision_mv);
2730 vpx_free(second_pred);
2733 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2734 BLOCK_SIZE_TYPE bsize,
2735 int64_t txfm_cache[],
2736 int *rate2, int64_t *distortion,
2738 int *rate_y, int64_t *distortion_y,
2739 int *rate_uv, int64_t *distortion_uv,
2740 int *mode_excluded, int *disable_skip,
2741 INTERPOLATIONFILTERTYPE *best_filter,
2742 int_mv (*mode_mv)[MAX_REF_FRAMES],
2743 int mi_row, int mi_col,
2744 int_mv single_newmv[MAX_REF_FRAMES],
2745 int64_t *psse, int64_t ref_best_rd) {
2746 VP9_COMMON *cm = &cpi->common;
2747 MACROBLOCKD *xd = &x->e_mbd;
2748 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2749 const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2750 const int num_refs = is_comp_pred ? 2 : 1;
2751 const int this_mode = mbmi->mode;
2752 int_mv *frame_mv = mode_mv[this_mode];
2754 int refs[2] = { mbmi->ref_frame[0],
2755 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2757 int64_t this_rd = 0;
2758 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2759 int pred_exists = 0;
2760 int interpolating_intpel_seen = 0;
2762 int64_t rd, best_rd = INT64_MAX;
2763 int best_needs_copy = 0;
2764 uint8_t *orig_dst[MAX_MB_PLANE];
2765 int orig_dst_stride[MAX_MB_PLANE];
2768 if (this_mode == NEWMV) {
2771 // Initialize mv using single prediction mode result.
2772 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2773 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2775 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2776 joint_motion_search(cpi, x, bsize, frame_mv,
2777 mi_row, mi_col, single_newmv, &rate_mv);
2779 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2780 &mbmi->ref_mvs[refs[0]][0],
2781 x->nmvjointcost, x->mvcost, 96,
2782 x->e_mbd.allow_high_precision_mv);
2783 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2784 &mbmi->ref_mvs[refs[1]][0],
2785 x->nmvjointcost, x->mvcost, 96,
2786 x->e_mbd.allow_high_precision_mv);
2788 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2789 frame_mv[refs[1]].as_int == INVALID_MV)
2794 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2796 frame_mv[refs[0]].as_int =
2797 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2798 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2802 // if we're near/nearest and mv == 0,0, compare to zeromv
2803 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2804 frame_mv[refs[0]].as_int == 0 &&
2805 !vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2806 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2807 int rfc = mbmi->mb_mode_context[mbmi->ref_frame[0]];
2808 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2809 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2810 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2812 if (this_mode == NEARMV) {
2815 } else if (this_mode == NEARESTMV) {
2819 assert(this_mode == ZEROMV);
2820 if (num_refs == 1) {
2822 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2824 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2828 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2829 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2831 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2832 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2838 for (i = 0; i < num_refs; ++i) {
2839 cur_mv[i] = frame_mv[refs[i]];
2840 // Clip "next_nearest" so that it does not extend to far out of image
2841 if (this_mode == NEWMV)
2842 assert(!clamp_mv2(&cur_mv[i], xd));
2844 clamp_mv2(&cur_mv[i], xd);
2846 if (mv_check_bounds(x, &cur_mv[i]))
2848 mbmi->mv[i].as_int = cur_mv[i].as_int;
2851 // do first prediction into the destination buffer. Do the next
2852 // prediction into a temporary buffer. Then keep track of which one
2853 // of these currently holds the best predictor, and use the other
2854 // one for future predictions. In the end, copy from tmp_buf to
2855 // dst if necessary.
2856 for (i = 0; i < MAX_MB_PLANE; i++) {
2857 orig_dst[i] = xd->plane[i].dst.buf;
2858 orig_dst_stride[i] = xd->plane[i].dst.stride;
2861 /* We don't include the cost of the second reference here, because there
2862 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2863 * words if you present them in that order, the second one is always known
2864 * if the first is known */
2865 *rate2 += cost_mv_ref(cpi, this_mode,
2866 mbmi->mb_mode_context[mbmi->ref_frame[0]]);
2868 if (!(*mode_excluded)) {
2870 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2872 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2877 interpolating_intpel_seen = 0;
2878 // Are all MVs integer pel for Y and UV
2879 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2880 (mbmi->mv[0].as_mv.col & 15) == 0;
2882 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2883 (mbmi->mv[1].as_mv.col & 15) == 0;
2884 // Search for best switchable filter by checking the variance of
2885 // pred error irrespective of whether the filter will be used
2886 *best_filter = EIGHTTAP;
2887 if (cpi->sf.use_8tap_always) {
2888 *best_filter = EIGHTTAP;
2889 vp9_zero(cpi->rd_filter_cache);
2892 int tmp_rate_sum = 0;
2893 int64_t tmp_dist_sum = 0;
2895 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
2896 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2899 const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
2900 const int is_intpel_interp = intpel_mv;
2901 mbmi->interp_filter = filter;
2902 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2903 rs = get_switchable_rate(cm, x);
2904 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2906 if (interpolating_intpel_seen && is_intpel_interp) {
2907 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2908 tmp_rate_sum, tmp_dist_sum);
2909 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
2910 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
2911 cpi->rd_filter_cache[i] + rs_rd);
2912 rd = cpi->rd_filter_cache[i];
2913 if (cm->mcomp_filter_type == SWITCHABLE)
2917 int64_t dist_sum = 0;
2918 if ((cm->mcomp_filter_type == SWITCHABLE &&
2919 (!i || best_needs_copy)) ||
2920 (cm->mcomp_filter_type != SWITCHABLE &&
2921 (cm->mcomp_filter_type == mbmi->interp_filter ||
2922 (!interpolating_intpel_seen && is_intpel_interp)))) {
2923 for (j = 0; j < MAX_MB_PLANE; j++) {
2924 xd->plane[j].dst.buf = orig_dst[j];
2925 xd->plane[j].dst.stride = orig_dst_stride[j];
2928 for (j = 0; j < MAX_MB_PLANE; j++) {
2929 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2930 xd->plane[j].dst.stride = 64;
2933 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2934 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2935 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2936 rate_sum, dist_sum);
2937 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
2938 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
2939 cpi->rd_filter_cache[i] + rs_rd);
2940 rd = cpi->rd_filter_cache[i];
2941 if (cm->mcomp_filter_type == SWITCHABLE)
2943 if (!interpolating_intpel_seen && is_intpel_interp) {
2944 tmp_rate_sum = rate_sum;
2945 tmp_dist_sum = dist_sum;
2948 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2949 if (rd / 2 > ref_best_rd) {
2950 for (i = 0; i < MAX_MB_PLANE; i++) {
2951 xd->plane[i].dst.buf = orig_dst[i];
2952 xd->plane[i].dst.stride = orig_dst_stride[i];
2957 newbest = i == 0 || rd < best_rd;
2961 *best_filter = mbmi->interp_filter;
2962 if (cm->mcomp_filter_type == SWITCHABLE && i &&
2963 !(interpolating_intpel_seen && is_intpel_interp))
2964 best_needs_copy = !best_needs_copy;
2967 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2968 (cm->mcomp_filter_type != SWITCHABLE &&
2969 cm->mcomp_filter_type == mbmi->interp_filter)) {
2972 interpolating_intpel_seen |= is_intpel_interp;
2975 for (i = 0; i < MAX_MB_PLANE; i++) {
2976 xd->plane[i].dst.buf = orig_dst[i];
2977 xd->plane[i].dst.stride = orig_dst_stride[i];
2980 // Set the appropriate filter
2981 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2982 cm->mcomp_filter_type : *best_filter;
2983 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2984 rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
2987 if (best_needs_copy) {
2988 // again temporarily set the buffers to local memory to prevent a memcpy
2989 for (i = 0; i < MAX_MB_PLANE; i++) {
2990 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2991 xd->plane[i].dst.stride = 64;
2995 // Handles the special case when a filter that is not in the
2996 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2997 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
3001 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3004 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
3005 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
3006 // if current pred_error modeled rd is substantially more than the best
3007 // so far, do not bother doing full rd
3008 if (rd / 2 > ref_best_rd) {
3009 for (i = 0; i < MAX_MB_PLANE; i++) {
3010 xd->plane[i].dst.buf = orig_dst[i];
3011 xd->plane[i].dst.stride = orig_dst_stride[i];
3017 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3018 *rate2 += get_switchable_rate(cm, x);
3020 if (!is_comp_pred) {
3021 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
3023 else if (x->encode_breakout) {
3024 const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]);
3025 const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize,
3027 unsigned int var, sse;
3028 // Skipping threshold for ac.
3029 unsigned int thresh_ac;
3030 // The encode_breakout input
3031 unsigned int encode_breakout = x->encode_breakout << 4;
3033 // Calculate threshold according to dequant value.
3034 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
3036 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
3037 if (thresh_ac > 36000)
3040 // Use encode_breakout input if it is bigger than internal threshold.
3041 if (thresh_ac < encode_breakout)
3042 thresh_ac = encode_breakout;
3044 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
3045 xd->plane[0].dst.buf,
3046 xd->plane[0].dst.stride, &sse);
3048 // Adjust threshold according to partition size.
3049 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
3050 b_height_log2_lookup[bsize]);
3052 // Y skipping condition checking
3053 if (sse < thresh_ac || sse == 0) {
3054 // Skipping threshold for dc
3055 unsigned int thresh_dc;
3057 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
3059 // dc skipping checking
3060 if ((sse - var) < thresh_dc || sse == var) {
3061 unsigned int sse_u, sse_v;
3062 unsigned int var_u, var_v;
3064 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
3065 x->plane[1].src.stride,
3066 xd->plane[1].dst.buf,
3067 xd->plane[1].dst.stride, &sse_u);
3069 // U skipping condition checking
3070 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
3071 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
3072 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
3073 x->plane[2].src.stride,
3074 xd->plane[2].dst.buf,
3075 xd->plane[2].dst.stride, &sse_v);
3077 // V skipping condition checking
3078 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
3079 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
3085 // Scaling factor for SSE from spatial domain to frequency domain
3086 // is 16. Adjust distortion accordingly.
3087 *distortion_uv = (sse_u + sse_v) << 4;
3088 *distortion = (sse << 4) + *distortion_uv;
3091 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3100 int skippable_y, skippable_uv;
3101 int64_t sseuv = INT_MAX;
3103 // Y cost and distortion
3104 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3105 bsize, txfm_cache, ref_best_rd);
3107 if (*rate_y == INT_MAX) {
3109 *distortion = INT64_MAX;
3110 for (i = 0; i < MAX_MB_PLANE; i++) {
3111 xd->plane[i].dst.buf = orig_dst[i];
3112 xd->plane[i].dst.stride = orig_dst_stride[i];
3118 *distortion += *distortion_y;
3120 super_block_uvrd(cm, x, rate_uv, distortion_uv,
3121 &skippable_uv, &sseuv, bsize);
3125 *distortion += *distortion_uv;
3126 *skippable = skippable_y && skippable_uv;
3129 for (i = 0; i < MAX_MB_PLANE; i++) {
3130 xd->plane[i].dst.buf = orig_dst[i];
3131 xd->plane[i].dst.stride = orig_dst_stride[i];
3134 return this_rd; // if 0, this will be re-calculated by caller
3137 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3138 int *returnrate, int64_t *returndist,
3139 BLOCK_SIZE_TYPE bsize,
3140 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3141 VP9_COMMON *const cm = &cpi->common;
3142 MACROBLOCKD *const xd = &x->e_mbd;
3143 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3144 int y_skip = 0, uv_skip;
3145 int64_t dist_y = 0, dist_uv = 0, txfm_cache[TX_MODES];
3148 vpx_memset(&txfm_cache, 0, sizeof(txfm_cache));
3150 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
3151 if (bsize >= BLOCK_SIZE_SB8X8) {
3152 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3153 &dist_y, &y_skip, bsize, txfm_cache,
3154 best_rd) >= best_rd) {
3155 *returnrate = INT_MAX;
3158 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3159 &dist_uv, &uv_skip, bsize);
3162 if (rd_pick_intra4x4mby_modes(cpi, x, &rate_y, &rate_y_tokenonly,
3163 &dist_y, best_rd) >= best_rd) {
3164 *returnrate = INT_MAX;
3167 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3168 &dist_uv, &uv_skip, BLOCK_SIZE_SB8X8);
3171 if (y_skip && uv_skip) {
3172 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3173 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3174 *returndist = dist_y + (dist_uv >> 2);
3175 memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
3178 *returnrate = rate_y + rate_uv +
3179 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3180 *returndist = dist_y + (dist_uv >> 2);
3181 if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
3182 for (i = 0; i < TX_MODES; i++) {
3183 ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->tx_mode];
3188 ctx->mic = *xd->mode_info_context;
3191 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3192 int mi_row, int mi_col,
3194 int64_t *returndistortion,
3195 BLOCK_SIZE_TYPE bsize,
3196 PICK_MODE_CONTEXT *ctx,
3197 int64_t best_rd_so_far) {
3198 VP9_COMMON *cm = &cpi->common;
3199 MACROBLOCKD *xd = &x->e_mbd;
3200 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
3201 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3202 MB_PREDICTION_MODE this_mode;
3203 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3204 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
3206 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3207 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3208 int_mv single_newmv[MAX_REF_FRAMES];
3209 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3211 int idx_list[4] = {0,
3215 int64_t best_rd = best_rd_so_far;
3216 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3217 int64_t best_txfm_rd[TX_MODES];
3218 int64_t best_txfm_diff[TX_MODES];
3219 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3220 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3221 int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
3222 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
3223 MB_MODE_INFO best_mbmode;
3225 int mode_index, best_mode_index = 0;
3226 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3227 vp9_prob comp_mode_p;
3228 int64_t best_intra_rd = INT64_MAX;
3229 int64_t best_inter_rd = INT64_MAX;
3230 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3231 // MB_PREDICTION_MODE best_inter_mode = ZEROMV;
3232 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3233 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
3234 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3235 int64_t dist_uv[TX_SIZES];
3236 int skip_uv[TX_SIZES];
3237 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3238 struct scale_factors scale_factor[4];
3239 unsigned int ref_frame_mask = 0;
3240 unsigned int mode_mask = 0;
3241 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3242 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3243 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3244 cpi->common.y_dc_delta_q);
3245 int_mv seg_mvs[4][MAX_REF_FRAMES];
3246 union b_mode_info best_bmodes[4];
3247 PARTITION_INFO best_partition;
3248 int bwsl = b_width_log2(bsize);
3249 int bws = (1 << bwsl) / 4; // mode_info step for subsize
3250 int bhsl = b_height_log2(bsize);
3251 int bhs = (1 << bhsl) / 4; // mode_info step for subsize
3254 x->skip_encode = (cpi->sf.skip_encode_frame &&
3255 xd->q_index < QIDX_SKIP_THRESH);
3257 for (i = 0; i < 4; i++) {
3259 for (j = 0; j < MAX_REF_FRAMES; j++)
3260 seg_mvs[i][j].as_int = INVALID_MV;
3262 // Everywhere the flag is set the error is much higher than its neighbors.
3263 ctx->frames_with_high_error = 0;
3264 ctx->modes_with_high_error = 0;
3266 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3268 vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
3269 vpx_memset(&single_newmv, 0, sizeof(single_newmv));
3271 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3272 best_pred_rd[i] = INT64_MAX;
3273 for (i = 0; i < TX_MODES; i++)
3274 best_txfm_rd[i] = INT64_MAX;
3275 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
3276 best_filter_rd[i] = INT64_MAX;
3277 for (i = 0; i < TX_SIZES; i++)
3278 rate_uv_intra[i] = INT_MAX;
3280 *returnrate = INT_MAX;
3282 // Create a mask set to 1 for each reference frame used by a smaller
3284 if (cpi->sf.use_avoid_tested_higherror) {
3285 switch (block_size) {
3287 for (i = 0; i < 4; i++) {
3288 for (j = 0; j < 4; j++) {
3289 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3290 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3293 for (i = 0; i < 4; i++) {
3294 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3295 mode_mask |= x->sb32_context[i].modes_with_high_error;
3299 for (i = 0; i < 4; i++) {
3301 x->mb_context[xd->sb_index][i].frames_with_high_error;
3302 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
3306 // Until we handle all block sizes set it to present;
3311 ref_frame_mask = ~ref_frame_mask;
3312 mode_mask = ~mode_mask;
3315 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3316 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3317 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
3318 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
3319 yv12_mb, scale_factor);
3321 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3322 frame_mv[ZEROMV][ref_frame].as_int = 0;
3325 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3326 int mode_excluded = 0;
3327 int64_t this_rd = INT64_MAX;
3328 int disable_skip = 0;
3329 int compmode_cost = 0;
3330 int rate2 = 0, rate_y = 0, rate_uv = 0;
3331 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3333 int64_t txfm_cache[TX_MODES];
3336 int64_t total_sse = INT_MAX;
3339 for (i = 0; i < TX_MODES; ++i)
3340 txfm_cache[i] = INT64_MAX;
3343 this_mode = vp9_mode_order[mode_index].mode;
3344 ref_frame = vp9_mode_order[mode_index].ref_frame;
3345 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3347 // Skip modes that have been masked off but always consider first mode.
3348 if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
3349 (cpi->unused_mode_skip_mask & (1 << mode_index)) )
3352 // Skip if the current reference frame has been masked off
3353 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3354 (cpi->ref_frame_mask & (1 << ref_frame)))
3357 // Test best rd so far against threshold for trying this mode.
3358 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
3359 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
3360 cpi->rd_threshes[bsize][mode_index] == INT_MAX)
3363 // Do not allow compound prediction if the segment level reference
3364 // frame feature is in use as in this case there can only be one reference.
3365 if ((second_ref_frame > INTRA_FRAME) &&
3366 vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME))
3369 // Skip some checking based on small partitions' result.
3370 if (x->fast_ms > 1 && !ref_frame)
3372 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3375 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
3376 if (!(ref_frame_mask & (1 << ref_frame))) {
3379 if (!(mode_mask & (1 << this_mode))) {
3382 if (second_ref_frame != NONE
3383 && !(ref_frame_mask & (1 << second_ref_frame))) {
3388 mbmi->ref_frame[0] = ref_frame;
3389 mbmi->ref_frame[1] = second_ref_frame;
3391 if (!(ref_frame == INTRA_FRAME
3392 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3395 if (!(second_ref_frame == NONE
3396 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3400 comp_pred = second_ref_frame > INTRA_FRAME;
3402 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3403 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3405 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3406 if (ref_frame != best_inter_ref_frame &&
3407 second_ref_frame != best_inter_ref_frame)
3410 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3412 if (ref_frame > 0 &&
3413 (scale_factor[ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
3414 scale_factor[ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
3415 this_mode == SPLITMV)
3418 if (second_ref_frame > 0 &&
3419 (scale_factor[second_ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
3420 scale_factor[second_ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
3421 this_mode == SPLITMV)
3424 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3425 mbmi->mode = this_mode;
3426 mbmi->uv_mode = DC_PRED;
3428 // Evaluate all sub-pel filters irrespective of whether we can use
3429 // them for this frame.
3430 mbmi->interp_filter = cm->mcomp_filter_type;
3431 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3433 if (bsize >= BLOCK_SIZE_SB8X8 &&
3434 (this_mode == I4X4_PRED || this_mode == SPLITMV))
3436 if (bsize < BLOCK_SIZE_SB8X8 &&
3437 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
3441 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3443 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3445 mode_excluded = mode_excluded
3447 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3449 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3452 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3456 // Select predictors
3457 for (i = 0; i < MAX_MB_PLANE; i++) {
3458 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3460 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3463 // If the segment reference frame feature is enabled....
3464 // then do nothing if the current ref frame is not allowed..
3465 if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME) &&
3466 vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) !=
3469 // If the segment skip feature is enabled....
3470 // then do nothing if the current mode is not allowed..
3471 } else if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP) &&
3472 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3474 // Disable this drop out case if the ref frame
3475 // segment level feature is enabled for this segment. This is to
3476 // prevent the possibility that we end up unable to pick any mode.
3477 } else if (!vp9_segfeature_active(&xd->seg, segment_id,
3478 SEG_LVL_REF_FRAME)) {
3479 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3480 // unless ARNR filtering is enabled in which case we want
3481 // an unfiltered alternative. We allow near/nearest as well
3482 // because they may result in zero-zero MVs but be cheaper.
3483 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3484 if ((this_mode != ZEROMV &&
3485 !(this_mode == NEARMV &&
3486 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3487 !(this_mode == NEARESTMV &&
3488 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3489 ref_frame != ALTREF_FRAME) {
3494 // TODO(JBB): This is to make up for the fact that we don't have sad
3495 // functions that work when the block size reads outside the umv. We
3496 // should fix this either by making the motion search just work on
3497 // a representative block in the boundary ( first ) and then implement a
3498 // function that does sads when inside the border..
3499 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3500 this_mode == NEWMV) {
3504 if (this_mode == I4X4_PRED) {
3508 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3509 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME))
3513 mbmi->txfm_size = TX_4X4;
3514 if (rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
3515 &distortion_y, best_rd) >= best_rd)
3518 rate2 += intra_cost_penalty;
3519 distortion2 += distortion_y;
3521 if (rate_uv_intra[TX_4X4] == INT_MAX) {
3522 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
3523 &rate_uv_tokenonly[TX_4X4],
3524 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
3527 rate2 += rate_uv_intra[TX_4X4];
3528 rate_uv = rate_uv_tokenonly[TX_4X4];
3529 distortion2 += dist_uv[TX_4X4];
3530 distortion_uv = dist_uv[TX_4X4];
3531 mbmi->uv_mode = mode_uv[TX_4X4];
3532 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3533 for (i = 0; i < TX_MODES; ++i)
3534 txfm_cache[i] = txfm_cache[ONLY_4X4];
3535 } else if (ref_frame == INTRA_FRAME) {
3537 // Only search the oblique modes if the best so far is
3538 // one of the neighboring directional modes
3539 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3540 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3541 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3544 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3545 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3548 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3549 bsize, txfm_cache, best_rd);
3551 if (rate_y == INT_MAX)
3554 uv_tx = MIN(mbmi->txfm_size, max_uv_txsize_lookup[bsize]);
3555 if (rate_uv_intra[uv_tx] == INT_MAX) {
3556 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
3557 &rate_uv_tokenonly[uv_tx],
3558 &dist_uv[uv_tx], &skip_uv[uv_tx],
3562 rate_uv = rate_uv_tokenonly[uv_tx];
3563 distortion_uv = dist_uv[uv_tx];
3564 skippable = skippable && skip_uv[uv_tx];
3565 mbmi->uv_mode = mode_uv[uv_tx];
3567 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3568 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
3569 rate2 += intra_cost_penalty;
3570 distortion2 = distortion_y + distortion_uv;
3571 } else if (this_mode == SPLITMV) {
3572 const int is_comp_pred = second_ref_frame > 0;
3575 int64_t this_rd_thresh;
3576 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3577 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3578 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3579 int tmp_best_skippable = 0;
3580 int switchable_filter_index;
3581 int_mv *second_ref = is_comp_pred ?
3582 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3583 union b_mode_info tmp_best_bmodes[16];
3584 MB_MODE_INFO tmp_best_mbmode;
3585 PARTITION_INFO tmp_best_partition;
3586 BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS];
3587 int pred_exists = 0;
3590 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3591 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3593 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3594 if (ref_frame != best_inter_ref_frame &&
3595 second_ref_frame != best_inter_ref_frame)
3599 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3600 cpi->rd_threshes[bsize][THR_NEWMV] :
3601 cpi->rd_threshes[bsize][THR_NEWA];
3602 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3603 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
3604 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
3606 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
3607 for (switchable_filter_index = 0;
3608 switchable_filter_index < VP9_SWITCHABLE_FILTERS;
3609 ++switchable_filter_index) {
3612 mbmi->interp_filter =
3613 vp9_switchable_interp[switchable_filter_index];
3614 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3616 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3617 &mbmi->ref_mvs[ref_frame][0],
3620 &rate, &rate_y, &distortion,
3621 &skippable, &total_sse,
3622 (int)this_rd_thresh, seg_mvs,
3623 bsi, switchable_filter_index,
3626 if (tmp_rd == INT64_MAX)
3628 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
3629 rs = get_switchable_rate(cm, x);
3630 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3631 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
3632 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
3633 if (cm->mcomp_filter_type == SWITCHABLE)
3636 newbest = (tmp_rd < tmp_best_rd);
3638 tmp_best_filter = mbmi->interp_filter;
3639 tmp_best_rd = tmp_rd;
3641 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
3642 (mbmi->interp_filter == cm->mcomp_filter_type &&
3643 cm->mcomp_filter_type != SWITCHABLE)) {
3644 tmp_best_rdu = tmp_rd;
3645 tmp_best_rate = rate;
3646 tmp_best_ratey = rate_y;
3647 tmp_best_distortion = distortion;
3648 tmp_best_sse = total_sse;
3649 tmp_best_skippable = skippable;
3650 tmp_best_mbmode = *mbmi;
3651 tmp_best_partition = *x->partition_info;
3652 for (i = 0; i < 4; i++)
3653 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
3655 if (switchable_filter_index == 0 &&
3656 cpi->sf.use_rd_breakout &&
3657 best_rd < INT64_MAX) {
3658 if (tmp_best_rdu / 2 > best_rd) {
3659 // skip searching the other filters if the first is
3660 // already substantially larger than the best so far
3661 tmp_best_filter = mbmi->interp_filter;
3662 tmp_best_rdu = INT64_MAX;
3667 } // switchable_filter_index loop
3669 if (tmp_best_rdu == INT64_MAX)
3672 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
3673 tmp_best_filter : cm->mcomp_filter_type);
3674 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3676 // Handles the special case when a filter that is not in the
3677 // switchable list (bilinear, 6-tap) is indicated at the frame level
3678 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3679 &mbmi->ref_mvs[ref_frame][0],
3682 &rate, &rate_y, &distortion,
3683 &skippable, &total_sse,
3684 (int)this_rd_thresh, seg_mvs,
3687 if (tmp_rd == INT64_MAX)
3690 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
3691 int rs = get_switchable_rate(cm, x);
3692 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
3694 tmp_rd = tmp_best_rdu;
3695 total_sse = tmp_best_sse;
3696 rate = tmp_best_rate;
3697 rate_y = tmp_best_ratey;
3698 distortion = tmp_best_distortion;
3699 skippable = tmp_best_skippable;
3700 *mbmi = tmp_best_mbmode;
3701 *x->partition_info = tmp_best_partition;
3702 for (i = 0; i < 4; i++)
3703 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
3707 distortion2 += distortion;
3709 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3710 rate2 += get_switchable_rate(cm, x);
3712 if (!mode_excluded) {
3714 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
3716 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
3718 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
3720 if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
3722 // If even the 'Y' rd value of split is higher than best so far
3723 // then dont bother looking at UV
3724 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3726 vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
3727 super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
3728 &uv_skippable, &uv_sse,
3729 BLOCK_SIZE_SB8X8, TX_4X4);
3731 distortion2 += distortion_uv;
3732 skippable = skippable && uv_skippable;
3733 total_sse += uv_sse;
3735 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3736 for (i = 0; i < TX_MODES; ++i)
3737 txfm_cache[i] = txfm_cache[ONLY_4X4];
3740 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3741 this_rd = handle_inter_mode(cpi, x, bsize,
3743 &rate2, &distortion2, &skippable,
3744 &rate_y, &distortion_y,
3745 &rate_uv, &distortion_uv,
3746 &mode_excluded, &disable_skip,
3747 &tmp_best_filter, frame_mv,
3749 single_newmv, &total_sse, best_rd);
3750 if (this_rd == INT64_MAX)
3754 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3755 rate2 += compmode_cost;
3758 // Estimate the reference frame signaling cost and add it
3759 // to the rolling cost variable.
3760 if (second_ref_frame > INTRA_FRAME) {
3761 rate2 += ref_costs_comp[ref_frame];
3763 rate2 += ref_costs_single[ref_frame];
3766 if (!disable_skip) {
3767 // Test for the condition where skip block will be activated
3768 // because there are no non zero coefficients and make any
3769 // necessary adjustment for rate. Ignore if skip is coded at
3770 // segment level as the cost wont have been added in.
3771 // Is Mb level skip allowed (i.e. not coded at segment level).
3772 const int mb_skip_allowed = !vp9_segfeature_active(&xd->seg, segment_id,
3775 if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
3776 // Back out the coefficient coding costs
3777 rate2 -= (rate_y + rate_uv);
3778 // for best yrd calculation
3781 if (mb_skip_allowed) {
3784 // Cost the skip mb case
3785 vp9_prob skip_prob =
3786 vp9_get_pred_prob_mbskip(cm, xd);
3789 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3790 rate2 += prob_skip_cost;
3793 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3794 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3795 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3796 // Add in the cost of the no skip flag.
3797 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3799 rate2 += prob_skip_cost;
3801 // FIXME(rbultje) make this work for splitmv also
3802 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3804 rate2 += prob_skip_cost;
3805 distortion2 = total_sse;
3806 assert(total_sse >= 0);
3807 rate2 -= (rate_y + rate_uv);
3812 } else if (mb_skip_allowed) {
3813 // Add in the cost of the no skip flag.
3814 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3816 rate2 += prob_skip_cost;
3819 // Calculate the final RD estimate for this mode.
3820 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3823 // Keep record of best intra rd
3824 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME &&
3825 is_intra_mode(xd->mode_info_context->mbmi.mode) &&
3826 this_rd < best_intra_rd) {
3827 best_intra_rd = this_rd;
3828 best_intra_mode = xd->mode_info_context->mbmi.mode;
3830 // Keep record of best inter rd with single reference
3831 if (xd->mode_info_context->mbmi.ref_frame[0] > INTRA_FRAME &&
3832 xd->mode_info_context->mbmi.ref_frame[1] == NONE &&
3834 this_rd < best_inter_rd) {
3835 best_inter_rd = this_rd;
3836 best_inter_ref_frame = ref_frame;
3837 // best_inter_mode = xd->mode_info_context->mbmi.mode;
3840 if (!disable_skip && ref_frame == INTRA_FRAME) {
3841 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3842 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3843 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
3844 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3847 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3848 // Store the respective mode distortions for later use.
3849 if (mode_distortions[this_mode] == -1
3850 || distortion2 < mode_distortions[this_mode]) {
3851 mode_distortions[this_mode] = distortion2;
3853 if (frame_distortions[ref_frame] == -1
3854 || distortion2 < frame_distortions[ref_frame]) {
3855 frame_distortions[ref_frame] = distortion2;
3859 // Did this mode help.. i.e. is it the new best mode
3860 if (this_rd < best_rd || x->skip) {
3861 if (!mode_excluded) {
3862 // Note index of best mode so far
3863 const int qstep = xd->plane[0].dequant[1];
3865 best_mode_index = mode_index;
3867 if (ref_frame == INTRA_FRAME) {
3868 /* required for left and above block mv */
3869 mbmi->mv[0].as_int = 0;
3872 *returnrate = rate2;
3873 *returndistortion = distortion2;
3875 best_yrd = best_rd -
3876 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3877 best_mbmode = *mbmi;
3878 best_skip2 = this_skip2;
3879 best_partition = *x->partition_info;
3881 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3882 for (i = 0; i < 4; i++)
3883 best_bmodes[i] = xd->mode_info_context->bmi[i];
3885 // TODO(debargha): enhance this test with a better distortion prediction
3886 // based on qp, activity mask and history
3887 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE)
3888 if (ref_frame > INTRA_FRAME && distortion2 * 4 < qstep * qstep)
3892 // Testing this mode gave rise to an improvement in best error score.
3893 // Lower threshold a bit for next time
3894 cpi->rd_thresh_mult[mode_index] =
3895 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3896 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3897 cpi->rd_threshes[mode_index] =
3898 (cpi->rd_baseline_thresh[mode_index] >> 7)
3899 * cpi->rd_thresh_mult[mode_index];
3902 // If the mode did not help improve the best error case then
3903 // raise the threshold for testing that mode next time around.
3905 cpi->rd_thresh_mult[mode_index] += 4;
3907 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3908 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3910 cpi->rd_threshes[mode_index] =
3911 (cpi->rd_baseline_thresh[mode_index] >> 7)
3912 * cpi->rd_thresh_mult[mode_index];
3916 /* keep record of best compound/single-only prediction */
3917 if (!disable_skip && ref_frame != INTRA_FRAME) {
3918 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3920 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3921 single_rate = rate2 - compmode_cost;
3922 hybrid_rate = rate2;
3924 single_rate = rate2;
3925 hybrid_rate = rate2 + compmode_cost;
3928 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3929 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3931 if (second_ref_frame <= INTRA_FRAME &&
3932 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3933 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3934 } else if (second_ref_frame > INTRA_FRAME &&
3935 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3936 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3938 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3939 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3942 /* keep record of best filter type */
3943 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3944 cm->mcomp_filter_type != BILINEAR) {
3945 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3946 VP9_SWITCHABLE_FILTERS :
3947 vp9_switchable_interp_map[cm->mcomp_filter_type]];
3948 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
3950 // In cases of poor prediction, filter_cache[] can contain really big
3951 // values, which actually are bigger than this_rd itself. This can
3952 // cause negative best_filter_rd[] values, which is obviously silly.
3953 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3954 if (cpi->rd_filter_cache[i] >= ref)
3955 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3956 else // FIXME(rbultje) do this for comppred also
3957 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
3958 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3962 /* keep record of best txfm size */
3963 if (bsize < BLOCK_SIZE_SB32X32) {
3964 if (bsize < BLOCK_SIZE_MB16X16) {
3965 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3966 txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
3967 txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
3969 txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
3971 if (!mode_excluded && this_rd != INT64_MAX) {
3972 for (i = 0; i < TX_MODES; i++) {
3973 int64_t adj_rd = INT64_MAX;
3974 if (this_mode != I4X4_PRED) {
3975 adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->tx_mode];
3980 if (adj_rd < best_txfm_rd[i])
3981 best_txfm_rd[i] = adj_rd;
3988 if (x->skip && !comp_pred)
3992 if (best_rd >= best_rd_so_far)
3995 // If we used an estimate for the uv intra rd in the loop above...
3996 if (cpi->sf.use_uv_intra_rd_estimate) {
3997 // Do Intra UV best rd mode selection if best mode choice above was intra.
3998 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3999 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
4000 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
4001 &rate_uv_tokenonly[uv_tx_size],
4002 &dist_uv[uv_tx_size],
4003 &skip_uv[uv_tx_size],
4004 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8
4009 // If indicated then mark the index of the chosen mode to be inspected at
4010 // other block sizes.
4011 if (bsize <= cpi->sf.unused_mode_skip_lvl) {
4012 cpi->unused_mode_skip_mask = cpi->unused_mode_skip_mask &
4013 (~((int64_t)1 << best_mode_index));
4016 // If we are using reference masking and the set mask flag is set then
4017 // create the reference frame mask.
4018 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
4019 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
4021 // Flag all modes that have a distortion thats > 2x the best we found at
4023 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
4024 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
4027 if (mode_distortions[mode_index] > 2 * *returndistortion) {
4028 ctx->modes_with_high_error |= (1 << mode_index);
4032 // Flag all ref frames that have a distortion thats > 2x the best we found at
4034 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
4035 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
4036 ctx->frames_with_high_error |= (1 << ref_frame);
4040 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
4041 *returnrate = INT_MAX;
4042 *returndistortion = INT_MAX;
4046 assert((cm->mcomp_filter_type == SWITCHABLE) ||
4047 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
4048 (best_mbmode.ref_frame[0] == INTRA_FRAME));
4050 // Updating rd_thresh_freq_fact[] here means that the differnt
4051 // partition/block sizes are handled independently based on the best
4052 // choice for the current partition. It may well be better to keep a scaled
4053 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4054 // combination that wins out.
4055 if (cpi->sf.adaptive_rd_thresh) {
4056 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
4057 if (mode_index == best_mode_index) {
4058 cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
4060 cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
4061 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
4062 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
4063 cpi->rd_thresh_freq_fact[bsize][mode_index] =
4064 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
4070 // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
4072 // Reduce the activation RD thresholds for the best choice mode
4073 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
4074 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
4075 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
4077 cpi->rd_thresh_mult[best_mode_index] =
4078 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
4079 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
4080 cpi->rd_threshes[best_mode_index] =
4081 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
4086 *mbmi = best_mbmode;
4087 x->skip |= best_skip2;
4088 if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
4089 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
4090 for (i = 0; i < 4; i++)
4091 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
4094 if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
4095 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
4096 for (i = 0; i < 4; i++)
4097 xd->mode_info_context->bmi[i].as_mv[0].as_int =
4098 best_bmodes[i].as_mv[0].as_int;
4100 if (mbmi->ref_frame[1] > 0)
4101 for (i = 0; i < 4; i++)
4102 xd->mode_info_context->bmi[i].as_mv[1].as_int =
4103 best_bmodes[i].as_mv[1].as_int;
4105 *x->partition_info = best_partition;
4107 mbmi->mv[0].as_int = xd->mode_info_context->bmi[3].as_mv[0].as_int;
4108 mbmi->mv[1].as_int = xd->mode_info_context->bmi[3].as_mv[1].as_int;
4111 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
4112 if (best_pred_rd[i] == INT64_MAX)
4113 best_pred_diff[i] = INT_MIN;
4115 best_pred_diff[i] = best_rd - best_pred_rd[i];
4119 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
4120 if (best_filter_rd[i] == INT64_MAX)
4121 best_filter_diff[i] = 0;
4123 best_filter_diff[i] = best_rd - best_filter_rd[i];
4125 if (cm->mcomp_filter_type == SWITCHABLE)
4126 assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);
4128 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
4132 for (i = 0; i < TX_MODES; i++) {
4133 if (best_txfm_rd[i] == INT64_MAX)
4134 best_txfm_diff[i] = 0;
4136 best_txfm_diff[i] = best_rd - best_txfm_rd[i];
4139 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
4142 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
4144 store_coding_context(x, ctx, best_mode_index,
4146 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4147 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4148 mbmi->ref_frame[1]][0],
4149 best_pred_diff, best_txfm_diff, best_filter_diff);