2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "./vp9_rtcd.h"
18 #include "vpx_mem/vpx_mem.h"
20 #include "vp9/common/vp9_common.h"
21 #include "vp9/common/vp9_entropy.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_pragmas.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/common/vp9_reconinter.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_seg_common.h"
31 #include "vp9/common/vp9_systemdependent.h"
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rdopt.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 #include "vp9/encoder/vp9_variance.h"
44 #define RD_THRESH_MAX_FACT 64
45 #define RD_THRESH_INC 1
46 #define RD_THRESH_POW 1.25
47 #define RD_MULT_EPB_RATIO 64
49 /* Factor to weigh the rate for switchable interp filters */
50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
52 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
53 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
54 #define ALT_REF_MODE_MASK 0xFFC648D0
56 #define MIN_EARLY_TERM_INDEX 3
60 MV_REFERENCE_FRAME ref_frame[2];
64 MV_REFERENCE_FRAME ref_frame[2];
67 struct rdcost_block_args {
69 ENTROPY_CONTEXT t_above[16];
70 ENTROPY_CONTEXT t_left[16];
80 int use_fast_coef_costing;
84 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
85 {NEARESTMV, {LAST_FRAME, NONE}},
86 {NEARESTMV, {ALTREF_FRAME, NONE}},
87 {NEARESTMV, {GOLDEN_FRAME, NONE}},
89 {DC_PRED, {INTRA_FRAME, NONE}},
91 {NEWMV, {LAST_FRAME, NONE}},
92 {NEWMV, {ALTREF_FRAME, NONE}},
93 {NEWMV, {GOLDEN_FRAME, NONE}},
95 {NEARMV, {LAST_FRAME, NONE}},
96 {NEARMV, {ALTREF_FRAME, NONE}},
97 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
98 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
100 {TM_PRED, {INTRA_FRAME, NONE}},
102 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
103 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
104 {NEARMV, {GOLDEN_FRAME, NONE}},
105 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
106 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
108 {ZEROMV, {LAST_FRAME, NONE}},
109 {ZEROMV, {GOLDEN_FRAME, NONE}},
110 {ZEROMV, {ALTREF_FRAME, NONE}},
111 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
112 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
114 {H_PRED, {INTRA_FRAME, NONE}},
115 {V_PRED, {INTRA_FRAME, NONE}},
116 {D135_PRED, {INTRA_FRAME, NONE}},
117 {D207_PRED, {INTRA_FRAME, NONE}},
118 {D153_PRED, {INTRA_FRAME, NONE}},
119 {D63_PRED, {INTRA_FRAME, NONE}},
120 {D117_PRED, {INTRA_FRAME, NONE}},
121 {D45_PRED, {INTRA_FRAME, NONE}},
124 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
125 {{LAST_FRAME, NONE}},
126 {{GOLDEN_FRAME, NONE}},
127 {{ALTREF_FRAME, NONE}},
128 {{LAST_FRAME, ALTREF_FRAME}},
129 {{GOLDEN_FRAME, ALTREF_FRAME}},
130 {{INTRA_FRAME, NONE}},
133 // The baseline rd thresholds for breaking out of the rd loop for
134 // certain modes are assumed to be based on 8x8 blocks.
135 // This table is used to correct for blocks size.
136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
137 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
138 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
141 static int raster_block_offset(BLOCK_SIZE plane_bsize,
142 int raster_block, int stride) {
143 const int bw = b_width_log2(plane_bsize);
144 const int y = 4 * (raster_block >> bw);
145 const int x = 4 * (raster_block & ((1 << bw) - 1));
146 return y * stride + x;
148 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
149 int raster_block, int16_t *base) {
150 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
151 return base + raster_block_offset(plane_bsize, raster_block, stride);
154 static void fill_mode_costs(VP9_COMP *cpi) {
155 const FRAME_CONTEXT *const fc = &cpi->common.fc;
158 for (i = 0; i < INTRA_MODES; i++)
159 for (j = 0; j < INTRA_MODES; j++)
160 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
161 vp9_intra_mode_tree);
163 // TODO(rbultje) separate tables for superblock costing?
164 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
165 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
166 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
167 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
168 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
170 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
171 vp9_cost_tokens(cpi->switchable_interp_costs[i],
172 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
175 static void fill_token_costs(vp9_coeff_cost *c,
176 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
179 for (t = TX_4X4; t <= TX_32X32; ++t)
180 for (i = 0; i < PLANE_TYPES; ++i)
181 for (j = 0; j < REF_TYPES; ++j)
182 for (k = 0; k < COEF_BANDS; ++k)
183 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
184 vp9_prob probs[ENTROPY_NODES];
185 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
186 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
188 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
190 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
191 c[t][i][j][k][1][l][EOB_TOKEN]);
195 static const uint8_t rd_iifactor[32] = {
196 4, 4, 3, 2, 1, 0, 0, 0,
197 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0,
199 0, 0, 0, 0, 0, 0, 0, 0,
202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
204 /* values are now correlated to quantizer */
205 static int sad_per_bit16lut[QINDEX_RANGE];
206 static int sad_per_bit4lut[QINDEX_RANGE];
208 void vp9_init_me_luts() {
211 // Initialize the sad lut tables using a formulaic calculation for now
212 // This is to make it easier to resolve the impact of experimental changes
213 // to the quantizer tables.
214 for (i = 0; i < QINDEX_RANGE; i++) {
215 const double q = vp9_convert_qindex_to_q(i);
216 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
217 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
222 const int q = vp9_dc_quant(qindex, 0);
223 // TODO(debargha): Adjust the function below
224 int rdmult = 88 * q * q / 25;
225 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
226 if (cpi->twopass.next_iiratio > 31)
227 rdmult += (rdmult * rd_iifactor[31]) >> 4;
229 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
234 static int compute_rd_thresh_factor(int qindex) {
235 // TODO(debargha): Adjust the function below
236 const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
241 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
242 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
245 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
246 int i, bsize, segment_id;
248 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
249 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
250 cm->base_qindex) + cm->y_dc_delta_q,
252 const int q = compute_rd_thresh_factor(qindex);
254 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
255 // Threshold here seems unnecessarily harsh but fine given actual
256 // range of values used for cpi->sf.thresh_mult[].
257 const int t = q * rd_thresh_block_size_factor[bsize];
258 const int thresh_max = INT_MAX / t;
260 if (bsize >= BLOCK_8X8) {
261 for (i = 0; i < MAX_MODES; ++i)
262 rd->threshes[segment_id][bsize][i] =
263 rd->thresh_mult[i] < thresh_max
264 ? rd->thresh_mult[i] * t / 4
267 for (i = 0; i < MAX_REFS; ++i)
268 rd->threshes[segment_id][bsize][i] =
269 rd->thresh_mult_sub8x8[i] < thresh_max
270 ? rd->thresh_mult_sub8x8[i] * t / 4
277 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
278 VP9_COMMON *const cm = &cpi->common;
279 MACROBLOCK *const x = &cpi->mb;
280 RD_OPT *const rd = &cpi->rd;
283 vp9_clear_system_state();
285 rd->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
286 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
288 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
289 x->errorperbit += (x->errorperbit == 0);
291 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
292 cm->frame_type != KEY_FRAME) ? 0 : 1;
294 set_block_thresholds(cm, rd);
296 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
297 fill_token_costs(x->token_costs, cm->fc.coef_probs);
299 for (i = 0; i < PARTITION_CONTEXTS; i++)
300 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
304 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
305 cm->frame_type == KEY_FRAME) {
306 fill_mode_costs(cpi);
308 if (!frame_is_intra_only(cm)) {
309 vp9_build_nmv_cost_table(x->nmvjointcost,
310 cm->allow_high_precision_mv ? x->nmvcost_hp
312 &cm->fc.nmvc, cm->allow_high_precision_mv);
314 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
315 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
316 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
321 static const int MAX_XSQ_Q10 = 245727;
323 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
324 // NOTE: The tables below must be of the same size
326 // The functions described below are sampled at the four most significant
327 // bits of x^2 + 8 / 256
330 // This table models the rate for a Laplacian source
331 // source with given variance when quantized with a uniform quantizer
332 // with given stepsize. The closed form expression is:
333 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
334 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
335 // and H(x) is the binary entropy function.
336 static const int rate_tab_q10[] = {
337 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
338 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
339 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
340 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
341 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
342 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
343 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
344 1159, 1086, 1021, 963, 911, 864, 821, 781,
345 745, 680, 623, 574, 530, 490, 455, 424,
346 395, 345, 304, 269, 239, 213, 190, 171,
347 154, 126, 104, 87, 73, 61, 52, 44,
348 38, 28, 21, 16, 12, 10, 8, 6,
349 5, 3, 2, 1, 1, 1, 0, 0,
351 // Normalized distortion
352 // This table models the normalized distortion for a Laplacian source
353 // source with given variance when quantized with a uniform quantizer
354 // with given stepsize. The closed form expression is:
355 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
356 // where x = qpstep / sqrt(variance)
357 // Note the actual distortion is Dn * variance.
358 static const int dist_tab_q10[] = {
359 0, 0, 1, 1, 1, 2, 2, 2,
360 3, 3, 4, 5, 5, 6, 7, 7,
361 8, 9, 11, 12, 13, 15, 16, 17,
362 18, 21, 24, 26, 29, 31, 34, 36,
363 39, 44, 49, 54, 59, 64, 69, 73,
364 78, 88, 97, 106, 115, 124, 133, 142,
365 151, 167, 184, 200, 215, 231, 245, 260,
366 274, 301, 327, 351, 375, 397, 418, 439,
367 458, 495, 528, 559, 587, 613, 637, 659,
368 680, 717, 749, 777, 801, 823, 842, 859,
369 874, 899, 919, 936, 949, 960, 969, 977,
370 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
371 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
373 static const int xsq_iq_q10[] = {
374 0, 4, 8, 12, 16, 20, 24, 28,
375 32, 40, 48, 56, 64, 72, 80, 88,
376 96, 112, 128, 144, 160, 176, 192, 208,
377 224, 256, 288, 320, 352, 384, 416, 448,
378 480, 544, 608, 672, 736, 800, 864, 928,
379 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
380 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
381 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
382 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
383 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
384 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
385 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
386 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
389 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
390 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
391 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
392 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
394 int tmp = (xsq_q10 >> 2) + 8;
395 int k = get_msb(tmp) - 3;
396 int xq = (k << 3) + ((tmp >> k) & 0x7);
397 const int one_q10 = 1 << 10;
398 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
399 const int b_q10 = one_q10 - a_q10;
400 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
401 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
404 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
405 unsigned int qstep, int *rate,
407 // This function models the rate and distortion for a Laplacian
408 // source with given variance when quantized with a uniform quantizer
409 // with given stepsize. The closed form expressions are in:
410 // Hang and Chen, "Source Model for transform video coder and its
411 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
412 // Sys. for Video Tech., April 1997.
418 const uint64_t xsq_q10_64 =
419 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
420 const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
421 MAX_XSQ_Q10 : (int)xsq_q10_64;
422 model_rd_norm(xsq_q10, &r_q10, &d_q10);
423 *rate = (n * r_q10 + 2) >> 2;
424 *dist = (var * (int64_t)d_q10 + 512) >> 10;
428 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
429 MACROBLOCK *x, MACROBLOCKD *xd,
430 int *out_rate_sum, int64_t *out_dist_sum) {
431 // Note our transform coeffs are 8 times an orthogonal transform.
432 // Hence quantizer step is also 8 times. To get effective quantizer
433 // we need to divide by 8 before sending to modeling function.
435 int64_t rate_sum = 0;
436 int64_t dist_sum = 0;
437 const int ref = xd->mi[0]->mbmi.ref_frame[0];
440 for (i = 0; i < MAX_MB_PLANE; ++i) {
441 struct macroblock_plane *const p = &x->plane[i];
442 struct macroblockd_plane *const pd = &xd->plane[i];
443 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
445 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
446 pd->dst.buf, pd->dst.stride, &sse);
449 x->pred_sse[ref] = sse;
451 // Fast approximate the modelling function.
452 if (cpi->oxcf.speed > 4) {
455 int64_t square_error = sse;
456 int quantizer = (pd->dequant[1] >> 3);
459 rate = (square_error * (280 - quantizer)) >> 8;
462 dist = (square_error * quantizer) >> 8;
468 vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
469 pd->dequant[1] >> 3, &rate, &dist);
475 *out_rate_sum = (int)rate_sum;
476 *out_dist_sum = dist_sum << 4;
479 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
481 MACROBLOCK *x, MACROBLOCKD *xd,
482 int *out_rate_sum, int64_t *out_dist_sum,
486 const struct macroblock_plane *const p = &x->plane[0];
487 const struct macroblockd_plane *const pd = &xd->plane[0];
488 const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
489 const int height = 4 * num_4x4_blocks_high_lookup[bsize];
491 int64_t dist_sum = 0;
492 const int t = 4 << tx_size;
494 if (tx_size == TX_4X4) {
496 } else if (tx_size == TX_8X8) {
498 } else if (tx_size == TX_16X16) {
500 } else if (tx_size == TX_32X32) {
507 for (j = 0; j < height; j += t) {
508 for (k = 0; k < width; k += t) {
512 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
513 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
515 // sse works better than var, since there is no dc prediction used
516 vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
520 *out_skip &= (rate < 1024);
524 *out_rate_sum = rate_sum;
525 *out_dist_sum = dist_sum << 4;
528 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
529 intptr_t block_size, int64_t *ssz) {
531 int64_t error = 0, sqcoeff = 0;
533 for (i = 0; i < block_size; i++) {
534 const int diff = coeff[i] - dqcoeff[i];
535 error += diff * diff;
536 sqcoeff += coeff[i] * coeff[i];
543 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
544 * decide whether to include cost of a trailing EOB node or not (i.e. we
545 * can skip this if the last coefficient in this transform block, e.g. the
546 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
548 static const int16_t band_counts[TX_SIZES][8] = {
549 { 1, 2, 3, 4, 3, 16 - 13, 0 },
550 { 1, 2, 3, 4, 11, 64 - 21, 0 },
551 { 1, 2, 3, 4, 11, 256 - 21, 0 },
552 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
554 static INLINE int cost_coeffs(MACROBLOCK *x,
555 int plane, int block,
556 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
558 const int16_t *scan, const int16_t *nb,
559 int use_fast_coef_costing) {
560 MACROBLOCKD *const xd = &x->e_mbd;
561 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
562 const struct macroblock_plane *p = &x->plane[plane];
563 const struct macroblockd_plane *pd = &xd->plane[plane];
564 const PLANE_TYPE type = pd->plane_type;
565 const int16_t *band_count = &band_counts[tx_size][1];
566 const int eob = p->eobs[block];
567 const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
568 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
569 x->token_costs[tx_size][type][is_inter_block(mbmi)];
570 uint8_t token_cache[32 * 32];
571 int pt = combine_entropy_contexts(*A, *L);
573 // Check for consistency of tx_size with mode info
574 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
575 : get_uv_tx_size(mbmi) == tx_size);
579 cost = token_costs[0][0][pt][EOB_TOKEN];
582 int band_left = *band_count++;
586 int prev_t = vp9_dct_value_tokens_ptr[v].token;
587 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
588 token_cache[0] = vp9_pt_energy_class[prev_t];
592 for (c = 1; c < eob; c++) {
593 const int rc = scan[c];
597 t = vp9_dct_value_tokens_ptr[v].token;
598 if (use_fast_coef_costing) {
599 cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
601 pt = get_coef_context(nb, token_cache, c);
602 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
603 token_cache[rc] = vp9_pt_energy_class[t];
607 band_left = *band_count++;
614 if (use_fast_coef_costing) {
615 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
617 pt = get_coef_context(nb, token_cache, c);
618 cost += (*token_costs)[0][pt][EOB_TOKEN];
623 // is eob first coefficient;
628 static void dist_block(int plane, int block, TX_SIZE tx_size,
629 struct rdcost_block_args* args) {
630 const int ss_txfrm_size = tx_size << 1;
631 MACROBLOCK* const x = args->x;
632 MACROBLOCKD* const xd = &x->e_mbd;
633 const struct macroblock_plane *const p = &x->plane[plane];
634 const struct macroblockd_plane *const pd = &xd->plane[plane];
636 int shift = tx_size == TX_32X32 ? 0 : 2;
637 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
638 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
639 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
641 args->sse = this_sse >> shift;
643 if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
644 // TODO(jingning): tune the model to better capture the distortion.
645 int64_t p = (pd->dequant[1] * pd->dequant[1] *
646 (1 << ss_txfrm_size)) >> (shift + 2);
647 args->dist += (p >> 4);
652 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
653 TX_SIZE tx_size, struct rdcost_block_args* args) {
655 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
657 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
658 args->t_left + y_idx, tx_size,
659 args->so->scan, args->so->neighbors,
660 args->use_fast_coef_costing);
663 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
664 TX_SIZE tx_size, void *arg) {
665 struct rdcost_block_args *args = arg;
666 MACROBLOCK *const x = args->x;
667 MACROBLOCKD *const xd = &x->e_mbd;
668 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
669 int64_t rd1, rd2, rd;
674 if (!is_inter_block(mbmi))
675 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
677 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
679 dist_block(plane, block, tx_size, args);
680 rate_block(plane, block, plane_bsize, tx_size, args);
681 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
682 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
684 // TODO(jingning): temporarily enabled only for luma component
687 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
688 (rd1 > rd2 && !xd->lossless);
690 args->this_rate += args->rate;
691 args->this_dist += args->dist;
692 args->this_sse += args->sse;
695 if (args->this_rd > args->best_rd) {
701 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
702 const struct macroblockd_plane *pd,
703 ENTROPY_CONTEXT t_above[16],
704 ENTROPY_CONTEXT t_left[16]) {
705 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
706 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
707 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
708 const ENTROPY_CONTEXT *const above = pd->above_context;
709 const ENTROPY_CONTEXT *const left = pd->left_context;
714 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
715 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
718 for (i = 0; i < num_4x4_w; i += 2)
719 t_above[i] = !!*(const uint16_t *)&above[i];
720 for (i = 0; i < num_4x4_h; i += 2)
721 t_left[i] = !!*(const uint16_t *)&left[i];
724 for (i = 0; i < num_4x4_w; i += 4)
725 t_above[i] = !!*(const uint32_t *)&above[i];
726 for (i = 0; i < num_4x4_h; i += 4)
727 t_left[i] = !!*(const uint32_t *)&left[i];
730 for (i = 0; i < num_4x4_w; i += 8)
731 t_above[i] = !!*(const uint64_t *)&above[i];
732 for (i = 0; i < num_4x4_h; i += 8)
733 t_left[i] = !!*(const uint64_t *)&left[i];
736 assert(0 && "Invalid transform size.");
740 static void txfm_rd_in_plane(MACROBLOCK *x,
741 int *rate, int64_t *distortion,
742 int *skippable, int64_t *sse,
743 int64_t ref_best_rd, int plane,
744 BLOCK_SIZE bsize, TX_SIZE tx_size,
745 int use_fast_coef_casting) {
746 MACROBLOCKD *const xd = &x->e_mbd;
747 const struct macroblockd_plane *const pd = &xd->plane[plane];
748 struct rdcost_block_args args = { 0 };
750 args.best_rd = ref_best_rd;
751 args.use_fast_coef_costing = use_fast_coef_casting;
754 xd->mi[0]->mbmi.tx_size = tx_size;
756 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
758 args.so = get_scan(xd, tx_size, pd->plane_type, 0);
760 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
761 block_rd_txfm, &args);
764 *distortion = INT64_MAX;
768 *distortion = args.this_dist;
769 *rate = args.this_rate;
770 *sse = args.this_sse;
771 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
775 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
776 int *rate, int64_t *distortion,
777 int *skip, int64_t *sse,
780 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
781 VP9_COMMON *const cm = &cpi->common;
782 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
783 MACROBLOCKD *const xd = &x->e_mbd;
784 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
786 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
788 txfm_rd_in_plane(x, rate, distortion, skip,
789 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
790 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
791 cpi->tx_stepdown_count[0]++;
794 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
795 int (*r)[2], int *rate,
796 int64_t *d, int64_t *distortion,
798 int64_t tx_cache[TX_MODES],
800 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
801 VP9_COMMON *const cm = &cpi->common;
802 MACROBLOCKD *const xd = &x->e_mbd;
803 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
804 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
805 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
806 {INT64_MAX, INT64_MAX},
807 {INT64_MAX, INT64_MAX},
808 {INT64_MAX, INT64_MAX}};
811 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
812 int64_t best_rd = INT64_MAX;
813 TX_SIZE best_tx = TX_4X4;
815 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
816 assert(skip_prob > 0);
817 s0 = vp9_cost_bit(skip_prob, 0);
818 s1 = vp9_cost_bit(skip_prob, 1);
820 for (n = TX_4X4; n <= max_tx_size; n++) {
822 if (r[n][0] < INT_MAX) {
823 for (m = 0; m <= n - (n == max_tx_size); m++) {
825 r[n][1] += vp9_cost_zero(tx_probs[m]);
827 r[n][1] += vp9_cost_one(tx_probs[m]);
830 if (d[n] == INT64_MAX) {
831 rd[n][0] = rd[n][1] = INT64_MAX;
833 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
835 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
836 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
839 if (rd[n][1] < best_rd) {
844 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
845 best_tx : MIN(max_tx_size, max_mode_tx_size);
848 *distortion = d[mbmi->tx_size];
849 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
850 *skip = s[mbmi->tx_size];
852 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
853 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
854 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
855 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
857 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
858 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
859 cpi->tx_stepdown_count[0]++;
860 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
861 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
862 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
863 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
864 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
865 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
867 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
868 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
872 static int64_t scaled_rd_cost(int rdmult, int rddiv,
873 int rate, int64_t dist, double scale) {
874 return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
877 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
878 int (*r)[2], int *rate,
879 int64_t *d, int64_t *distortion,
880 int *s, int *skip, int64_t *sse,
883 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
884 VP9_COMMON *const cm = &cpi->common;
885 MACROBLOCKD *const xd = &x->e_mbd;
886 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
887 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
888 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
889 {INT64_MAX, INT64_MAX},
890 {INT64_MAX, INT64_MAX},
891 {INT64_MAX, INT64_MAX}};
894 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
895 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
896 int64_t best_rd = INT64_MAX;
897 TX_SIZE best_tx = TX_4X4;
899 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
900 assert(skip_prob > 0);
901 s0 = vp9_cost_bit(skip_prob, 0);
902 s1 = vp9_cost_bit(skip_prob, 1);
904 for (n = TX_4X4; n <= max_tx_size; n++) {
905 double scale = scale_rd[n];
907 for (m = 0; m <= n - (n == max_tx_size); m++) {
909 r[n][1] += vp9_cost_zero(tx_probs[m]);
911 r[n][1] += vp9_cost_one(tx_probs[m]);
914 rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
917 rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
919 rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
922 if (rd[n][1] < best_rd) {
928 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
929 best_tx : MIN(max_tx_size, max_mode_tx_size);
931 // Actually encode using the chosen mode if a model was used, but do not
932 // update the r, d costs
933 txfm_rd_in_plane(x, rate, distortion, skip,
934 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
935 cpi->sf.use_fast_coef_costing);
937 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
938 cpi->tx_stepdown_count[0]++;
939 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
940 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
941 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
942 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
944 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
948 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
949 int64_t *distortion, int *skip,
950 int64_t *psse, BLOCK_SIZE bs,
951 int64_t txfm_cache[TX_MODES],
952 int64_t ref_best_rd) {
953 int r[TX_SIZES][2], s[TX_SIZES];
954 int64_t d[TX_SIZES], sse[TX_SIZES];
955 MACROBLOCKD *xd = &x->e_mbd;
956 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
957 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
960 assert(bs == mbmi->sb_type);
962 vp9_subtract_plane(x, bs, 0);
964 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
965 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
966 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
969 *psse = sse[mbmi->tx_size];
973 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
974 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
975 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
976 &r[tx_size][0], &d[tx_size], &s[tx_size]);
977 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
978 skip, sse, ref_best_rd, bs);
980 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
981 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
982 &s[tx_size], &sse[tx_size],
983 ref_best_rd, 0, bs, tx_size,
984 cpi->sf.use_fast_coef_costing);
985 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
986 skip, txfm_cache, bs);
989 *psse = sse[mbmi->tx_size];
992 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
993 int64_t *distortion, int *skip,
994 int64_t *psse, BLOCK_SIZE bs,
995 int64_t txfm_cache[TX_MODES],
996 int64_t ref_best_rd) {
997 int64_t sse[TX_SIZES];
998 MACROBLOCKD *xd = &x->e_mbd;
999 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1001 assert(bs == mbmi->sb_type);
1002 if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1003 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1004 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1007 int r[TX_SIZES][2], s[TX_SIZES];
1008 int64_t d[TX_SIZES];
1010 for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1011 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1012 &s[tx_size], &sse[tx_size],
1013 ref_best_rd, 0, bs, tx_size,
1014 cpi->sf.use_fast_coef_costing);
1015 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1016 skip, txfm_cache, bs);
1019 *psse = sse[mbmi->tx_size];
1023 static int conditional_skipintra(PREDICTION_MODE mode,
1024 PREDICTION_MODE best_intra_mode) {
1025 if (mode == D117_PRED &&
1026 best_intra_mode != V_PRED &&
1027 best_intra_mode != D135_PRED)
1029 if (mode == D63_PRED &&
1030 best_intra_mode != V_PRED &&
1031 best_intra_mode != D45_PRED)
1033 if (mode == D207_PRED &&
1034 best_intra_mode != H_PRED &&
1035 best_intra_mode != D45_PRED)
1037 if (mode == D153_PRED &&
1038 best_intra_mode != H_PRED &&
1039 best_intra_mode != D135_PRED)
1044 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1045 PREDICTION_MODE *best_mode,
1046 const int *bmode_costs,
1047 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1048 int *bestrate, int *bestratey,
1049 int64_t *bestdistortion,
1050 BLOCK_SIZE bsize, int64_t rd_thresh) {
1051 PREDICTION_MODE mode;
1052 MACROBLOCKD *const xd = &x->e_mbd;
1053 int64_t best_rd = rd_thresh;
1055 struct macroblock_plane *p = &x->plane[0];
1056 struct macroblockd_plane *pd = &xd->plane[0];
1057 const int src_stride = p->src.stride;
1058 const int dst_stride = pd->dst.stride;
1059 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1061 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1063 ENTROPY_CONTEXT ta[2], tempa[2];
1064 ENTROPY_CONTEXT tl[2], templ[2];
1066 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1067 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1069 uint8_t best_dst[8 * 8];
1073 vpx_memcpy(ta, a, sizeof(ta));
1074 vpx_memcpy(tl, l, sizeof(tl));
1075 xd->mi[0]->mbmi.tx_size = TX_4X4;
1077 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1080 int64_t distortion = 0;
1081 int rate = bmode_costs[mode];
1083 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1086 // Only do the oblique modes if the best so far is
1087 // one of the neighboring directional modes
1088 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1089 if (conditional_skipintra(mode, *best_mode))
1093 vpx_memcpy(tempa, ta, sizeof(ta));
1094 vpx_memcpy(templ, tl, sizeof(tl));
1096 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1097 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1098 const int block = ib + idy * 2 + idx;
1099 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1100 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1101 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1103 int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1104 xd->mi[0]->bmi[block].as_mode = mode;
1105 vp9_predict_intra_block(xd, block, 1,
1107 x->skip_encode ? src : dst,
1108 x->skip_encode ? src_stride : dst_stride,
1109 dst, dst_stride, idx, idy, 0);
1110 vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1113 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1114 vp9_fwht4x4(src_diff, coeff, 8);
1115 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1116 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1117 so->scan, so->neighbors,
1118 cpi->sf.use_fast_coef_costing);
1119 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1121 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1125 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1126 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1127 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1128 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1129 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1130 so->scan, so->neighbors,
1131 cpi->sf.use_fast_coef_costing);
1132 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1134 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1136 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1137 dst, dst_stride, p->eobs[block]);
1143 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1145 if (this_rd < best_rd) {
1148 *bestdistortion = distortion;
1151 vpx_memcpy(a, tempa, sizeof(tempa));
1152 vpx_memcpy(l, templ, sizeof(templ));
1153 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1154 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1155 num_4x4_blocks_wide * 4);
1161 if (best_rd >= rd_thresh || x->skip_encode)
1164 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1165 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1166 num_4x4_blocks_wide * 4);
1171 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1172 int *rate, int *rate_y,
1173 int64_t *distortion,
1176 const MACROBLOCKD *const xd = &mb->e_mbd;
1177 MODE_INFO *const mic = xd->mi[0];
1178 const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1179 const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1180 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1181 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1182 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1185 int64_t total_distortion = 0;
1187 int64_t total_rd = 0;
1188 ENTROPY_CONTEXT t_above[4], t_left[4];
1189 const int *bmode_costs = cpi->mbmode_cost;
1191 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1192 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1194 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1195 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1196 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1197 PREDICTION_MODE best_mode = DC_PRED;
1198 int r = INT_MAX, ry = INT_MAX;
1199 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1201 if (cpi->common.frame_type == KEY_FRAME) {
1202 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1203 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1205 bmode_costs = cpi->y_mode_costs[A][L];
1208 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1209 t_above + idx, t_left + idy, &r, &ry, &d,
1210 bsize, best_rd - total_rd);
1211 if (this_rd >= best_rd - total_rd)
1214 total_rd += this_rd;
1216 total_distortion += d;
1219 mic->bmi[i].as_mode = best_mode;
1220 for (j = 1; j < num_4x4_blocks_high; ++j)
1221 mic->bmi[i + j * 2].as_mode = best_mode;
1222 for (j = 1; j < num_4x4_blocks_wide; ++j)
1223 mic->bmi[i + j].as_mode = best_mode;
1225 if (total_rd >= best_rd)
1231 *rate_y = tot_rate_y;
1232 *distortion = total_distortion;
1233 mic->mbmi.mode = mic->bmi[3].as_mode;
1235 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1238 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1239 int *rate, int *rate_tokenonly,
1240 int64_t *distortion, int *skippable,
1242 int64_t tx_cache[TX_MODES],
1244 PREDICTION_MODE mode;
1245 PREDICTION_MODE mode_selected = DC_PRED;
1246 MACROBLOCKD *const xd = &x->e_mbd;
1247 MODE_INFO *const mic = xd->mi[0];
1248 int this_rate, this_rate_tokenonly, s;
1249 int64_t this_distortion, this_rd;
1250 TX_SIZE best_tx = TX_4X4;
1252 int *bmode_costs = cpi->mbmode_cost;
1254 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1255 for (i = 0; i < TX_MODES; i++)
1256 tx_cache[i] = INT64_MAX;
1258 /* Y Search for intra prediction mode */
1259 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1260 int64_t local_tx_cache[TX_MODES];
1261 MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1262 MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1264 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1267 if (cpi->common.frame_type == KEY_FRAME) {
1268 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1269 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1271 bmode_costs = cpi->y_mode_costs[A][L];
1273 mic->mbmi.mode = mode;
1275 intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1276 &s, NULL, bsize, local_tx_cache, best_rd);
1278 if (this_rate_tokenonly == INT_MAX)
1281 this_rate = this_rate_tokenonly + bmode_costs[mode];
1282 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1284 if (this_rd < best_rd) {
1285 mode_selected = mode;
1287 best_tx = mic->mbmi.tx_size;
1289 *rate_tokenonly = this_rate_tokenonly;
1290 *distortion = this_distortion;
1294 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1295 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1296 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1297 local_tx_cache[cpi->common.tx_mode];
1298 if (adj_rd < tx_cache[i]) {
1299 tx_cache[i] = adj_rd;
1305 mic->mbmi.mode = mode_selected;
1306 mic->mbmi.tx_size = best_tx;
1311 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1312 int *rate, int64_t *distortion, int *skippable,
1313 int64_t *sse, BLOCK_SIZE bsize,
1314 int64_t ref_best_rd) {
1315 MACROBLOCKD *const xd = &x->e_mbd;
1316 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1317 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1319 int pnrate = 0, pnskip = 1;
1320 int64_t pndist = 0, pnsse = 0;
1322 if (ref_best_rd < 0)
1325 if (is_inter_block(mbmi)) {
1327 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1328 vp9_subtract_plane(x, bsize, plane);
1336 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1337 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1338 ref_best_rd, plane, bsize, uv_txfm_size,
1339 cpi->sf.use_fast_coef_costing);
1340 if (pnrate == INT_MAX)
1343 *distortion += pndist;
1345 *skippable &= pnskip;
1351 *distortion = INT64_MAX;
1357 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1358 PICK_MODE_CONTEXT *ctx,
1359 int *rate, int *rate_tokenonly,
1360 int64_t *distortion, int *skippable,
1361 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1362 MACROBLOCKD *xd = &x->e_mbd;
1363 PREDICTION_MODE mode;
1364 PREDICTION_MODE mode_selected = DC_PRED;
1365 int64_t best_rd = INT64_MAX, this_rd;
1366 int this_rate_tokenonly, this_rate, s;
1367 int64_t this_distortion, this_sse;
1369 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1370 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1373 xd->mi[0]->mbmi.uv_mode = mode;
1375 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1376 &this_distortion, &s, &this_sse, bsize, best_rd);
1377 if (this_rate_tokenonly == INT_MAX)
1379 this_rate = this_rate_tokenonly +
1380 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
1381 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1383 if (this_rd < best_rd) {
1384 mode_selected = mode;
1387 *rate_tokenonly = this_rate_tokenonly;
1388 *distortion = this_distortion;
1390 if (!x->select_txfm_size) {
1392 struct macroblock_plane *const p = x->plane;
1393 struct macroblockd_plane *const pd = xd->plane;
1394 for (i = 1; i < MAX_MB_PLANE; ++i) {
1395 p[i].coeff = ctx->coeff_pbuf[i][2];
1396 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1397 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1398 p[i].eobs = ctx->eobs_pbuf[i][2];
1400 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1401 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1402 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1403 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1405 ctx->coeff_pbuf[i][0] = p[i].coeff;
1406 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1407 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1408 ctx->eobs_pbuf[i][0] = p[i].eobs;
1414 xd->mi[0]->mbmi.uv_mode = mode_selected;
1418 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1419 int *rate, int *rate_tokenonly,
1420 int64_t *distortion, int *skippable,
1422 const VP9_COMMON *cm = &cpi->common;
1425 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
1426 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1427 skippable, &unused, bsize, INT64_MAX);
1428 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1429 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1432 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1433 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1434 int *rate_uv, int *rate_uv_tokenonly,
1435 int64_t *dist_uv, int *skip_uv,
1436 PREDICTION_MODE *mode_uv) {
1437 MACROBLOCK *const x = &cpi->mb;
1439 // Use an estimated rd for uv_intra based on DC_PRED if the
1440 // appropriate speed flag is set.
1441 if (cpi->sf.use_uv_intra_rd_estimate) {
1442 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1443 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1444 // Else do a proper rd search for each possible transform size that may
1445 // be considered in the main rd loop.
1447 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1448 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1449 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1451 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1454 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1456 const MACROBLOCK *const x = &cpi->mb;
1457 const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
1459 // Don't account for mode here if segment skip is enabled.
1460 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1461 assert(is_inter_mode(mode));
1462 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1468 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1471 int mi_row, int mi_col,
1472 int_mv single_newmv[MAX_REF_FRAMES],
1475 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1476 PREDICTION_MODE mode, int_mv this_mv[2],
1477 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1478 int_mv seg_mvs[MAX_REF_FRAMES],
1479 int_mv *best_ref_mv[2], const int *mvjcost,
1481 MODE_INFO *const mic = xd->mi[0];
1482 const MB_MODE_INFO *const mbmi = &mic->mbmi;
1485 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1486 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1487 const int is_compound = has_second_ref(mbmi);
1491 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1492 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1493 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1495 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1496 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1497 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1502 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1504 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1507 this_mv[0].as_int = 0;
1509 this_mv[1].as_int = 0;
1515 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1517 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1519 mic->bmi[i].as_mode = mode;
1521 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1522 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1523 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1524 &mic->bmi[i], sizeof(mic->bmi[i]));
1526 return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1530 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1535 int64_t *distortion, int64_t *sse,
1536 ENTROPY_CONTEXT *ta,
1537 ENTROPY_CONTEXT *tl,
1538 int mi_row, int mi_col) {
1540 MACROBLOCKD *xd = &x->e_mbd;
1541 struct macroblockd_plane *const pd = &xd->plane[0];
1542 struct macroblock_plane *const p = &x->plane[0];
1543 MODE_INFO *const mi = xd->mi[0];
1544 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1545 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1546 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1549 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1551 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1553 int64_t thisdistortion = 0, thissse = 0;
1554 int thisrate = 0, ref;
1555 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1556 const int is_compound = has_second_ref(&mi->mbmi);
1557 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1559 for (ref = 0; ref < 1 + is_compound; ++ref) {
1560 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1561 pd->pre[ref].stride)];
1562 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1563 dst, pd->dst.stride,
1564 &mi->bmi[i].as_mv[ref].as_mv,
1565 &xd->block_refs[ref]->sf, width, height, ref,
1566 kernel, MV_PRECISION_Q3,
1567 mi_col * MI_SIZE + 4 * (i % 2),
1568 mi_row * MI_SIZE + 4 * (i / 2));
1571 vp9_subtract_block(height, width,
1572 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1574 dst, pd->dst.stride);
1577 for (idy = 0; idy < height / 4; ++idy) {
1578 for (idx = 0; idx < width / 4; ++idx) {
1579 int64_t ssz, rd, rd1, rd2;
1582 k += (idy * 2 + idx);
1583 coeff = BLOCK_OFFSET(p->coeff, k);
1584 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1586 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1587 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1590 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1591 so->scan, so->neighbors,
1592 cpi->sf.use_fast_coef_costing);
1593 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1601 *distortion = thisdistortion >> 2;
1602 *labelyrate = thisrate;
1603 *sse = thissse >> 2;
1605 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1616 ENTROPY_CONTEXT ta[2];
1617 ENTROPY_CONTEXT tl[2];
1629 PREDICTION_MODE modes[4];
1630 SEG_RDSTAT rdstat[4][INTER_MODES];
1634 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1635 return (mv->row >> 3) < x->mv_row_min ||
1636 (mv->row >> 3) > x->mv_row_max ||
1637 (mv->col >> 3) < x->mv_col_min ||
1638 (mv->col >> 3) > x->mv_col_max;
1641 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1642 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1643 struct macroblock_plane *const p = &x->plane[0];
1644 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1646 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1647 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1648 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1649 pd->pre[0].stride)];
1650 if (has_second_ref(mbmi))
1651 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1652 pd->pre[1].stride)];
1655 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1656 struct buf_2d orig_pre[2]) {
1657 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1658 x->plane[0].src = orig_src;
1659 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1660 if (has_second_ref(mbmi))
1661 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1664 static INLINE int mv_has_subpel(const MV *mv) {
1665 return (mv->row & 0x0F) || (mv->col & 0x0F);
1668 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1669 // TODO(aconverse): Find out if this is still productive then clean up or remove
1670 static int check_best_zero_mv(
1671 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1672 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1673 int disable_inter_mode_mask, int this_mode,
1674 const MV_REFERENCE_FRAME ref_frames[2]) {
1675 if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1676 (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1677 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1678 (ref_frames[1] == NONE ||
1679 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1680 int rfc = mode_context[ref_frames[0]];
1681 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1682 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1683 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1685 if (this_mode == NEARMV) {
1686 if (c1 > c3) return 0;
1687 } else if (this_mode == NEARESTMV) {
1688 if (c2 > c3) return 0;
1690 assert(this_mode == ZEROMV);
1691 if (ref_frames[1] == NONE) {
1692 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1693 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1696 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1697 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1698 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1699 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1707 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1708 const TileInfo * const tile,
1709 int_mv *best_ref_mv,
1710 int_mv *second_best_ref_mv,
1711 int64_t best_rd, int *returntotrate,
1713 int64_t *returndistortion,
1714 int *skippable, int64_t *psse,
1716 int_mv seg_mvs[4][MAX_REF_FRAMES],
1717 BEST_SEG_INFO *bsi_buf, int filter_idx,
1718 int mi_row, int mi_col) {
1720 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1721 MACROBLOCKD *xd = &x->e_mbd;
1722 MODE_INFO *mi = xd->mi[0];
1723 MB_MODE_INFO *mbmi = &mi->mbmi;
1725 int k, br = 0, idx, idy;
1726 int64_t bd = 0, block_sse = 0;
1727 PREDICTION_MODE this_mode;
1728 VP9_COMMON *cm = &cpi->common;
1729 struct macroblock_plane *const p = &x->plane[0];
1730 struct macroblockd_plane *const pd = &xd->plane[0];
1731 const int label_count = 4;
1732 int64_t this_segment_rd = 0;
1733 int label_mv_thresh;
1734 int segmentyrate = 0;
1735 const BLOCK_SIZE bsize = mbmi->sb_type;
1736 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1737 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1738 vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1739 ENTROPY_CONTEXT t_above[2], t_left[2];
1740 int subpelmv = 1, have_ref = 0;
1741 const int has_second_rf = has_second_ref(mbmi);
1742 const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1746 bsi->segment_rd = best_rd;
1747 bsi->ref_mv[0] = best_ref_mv;
1748 bsi->ref_mv[1] = second_best_ref_mv;
1749 bsi->mvp.as_int = best_ref_mv->as_int;
1750 bsi->mvthresh = mvthresh;
1752 for (i = 0; i < 4; i++)
1753 bsi->modes[i] = ZEROMV;
1755 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1756 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1758 // 64 makes this threshold really big effectively
1759 // making it so that we very rarely check mvs on
1760 // segments. setting this to 1 would make mv thresh
1761 // roughly equal to what it is for macroblocks
1762 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1764 // Segmentation method overheads
1765 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1766 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1767 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1768 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1769 int_mv mode_mv[MB_MODE_COUNT][2];
1770 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1771 PREDICTION_MODE mode_selected = ZEROMV;
1772 int64_t best_rd = INT64_MAX;
1773 const int i = idy * 2 + idx;
1776 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1777 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1778 frame_mv[ZEROMV][frame].as_int = 0;
1779 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1780 &frame_mv[NEARESTMV][frame],
1781 &frame_mv[NEARMV][frame]);
1784 // search for the best motion vector on this segment
1785 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1786 const struct buf_2d orig_src = x->plane[0].src;
1787 struct buf_2d orig_pre[2];
1789 mode_idx = INTER_OFFSET(this_mode);
1790 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1791 if (disable_inter_mode_mask & (1 << mode_idx))
1794 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1795 disable_inter_mode_mask,
1796 this_mode, mbmi->ref_frame))
1799 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1800 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1801 sizeof(bsi->rdstat[i][mode_idx].ta));
1802 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1803 sizeof(bsi->rdstat[i][mode_idx].tl));
1805 // motion search for newmv (single predictor case only)
1806 if (!has_second_rf && this_mode == NEWMV &&
1807 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1808 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1811 int thissme, bestsme = INT_MAX;
1812 int sadpb = x->sadperbit4;
1816 /* Is the best so far sufficiently good that we cant justify doing
1817 * and new motion search. */
1818 if (best_rd < label_mv_thresh)
1821 if (!is_best_mode(cpi->oxcf.mode)) {
1822 // use previous block's result as next block's MV predictor.
1824 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1826 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1830 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1832 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1834 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1835 // Take wtd average of the step_params based on the last frame's
1836 // max mv magnitude and the best ref mvs of the current block for
1837 // the given reference.
1838 step_param = (vp9_init_search_range(cpi, max_mv) +
1839 cpi->mv_step_param) >> 1;
1841 step_param = cpi->mv_step_param;
1844 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1845 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1847 if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1848 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1849 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1850 step_param = MAX(step_param, 8);
1853 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1854 // adjust src pointer for this block
1857 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1859 if (cpi->sf.search_method == HEX) {
1860 bestsme = vp9_hex_search(x, &mvp_full,
1862 sadpb, 1, v_fn_ptr, 1,
1863 &bsi->ref_mv[0]->as_mv,
1865 if (bestsme < INT_MAX)
1866 bestsme = vp9_get_mvpred_var(x, new_mv,
1867 &bsi->ref_mv[0]->as_mv,
1869 } else if (cpi->sf.search_method == SQUARE) {
1870 bestsme = vp9_square_search(x, &mvp_full,
1872 sadpb, 1, v_fn_ptr, 1,
1873 &bsi->ref_mv[0]->as_mv,
1875 if (bestsme < INT_MAX)
1876 bestsme = vp9_get_mvpred_var(x, new_mv,
1877 &bsi->ref_mv[0]->as_mv,
1879 } else if (cpi->sf.search_method == BIGDIA) {
1880 bestsme = vp9_bigdia_search(x, &mvp_full,
1882 sadpb, 1, v_fn_ptr, 1,
1883 &bsi->ref_mv[0]->as_mv,
1885 if (bestsme < INT_MAX)
1886 bestsme = vp9_get_mvpred_var(x, new_mv,
1887 &bsi->ref_mv[0]->as_mv,
1890 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1891 sadpb, further_steps, 0, v_fn_ptr,
1892 &bsi->ref_mv[0]->as_mv,
1896 // Should we do a full search (best quality only)
1897 if (is_best_mode(cpi->oxcf.mode)) {
1898 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1899 /* Check if mvp_full is within the range. */
1900 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1901 x->mv_row_min, x->mv_row_max);
1902 thissme = cpi->full_search_sad(x, &mvp_full,
1903 sadpb, 16, v_fn_ptr,
1904 &bsi->ref_mv[0]->as_mv,
1906 if (thissme < bestsme) {
1908 *new_mv = best_mv->as_mv;
1910 // The full search result is actually worse so re-instate the
1911 // previous best vector
1912 best_mv->as_mv = *new_mv;
1916 if (bestsme < INT_MAX) {
1918 cpi->find_fractional_mv_step(x,
1920 &bsi->ref_mv[0]->as_mv,
1921 cm->allow_high_precision_mv,
1922 x->errorperbit, v_fn_ptr,
1923 cpi->sf.subpel_force_stop,
1924 cpi->sf.subpel_iters_per_step,
1925 x->nmvjointcost, x->mvcost,
1927 &x->pred_sse[mbmi->ref_frame[0]]);
1929 // save motion search result for use in compound prediction
1930 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1933 if (cpi->sf.adaptive_motion_search)
1934 x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
1936 // restore src pointers
1937 mi_buf_restore(x, orig_src, orig_pre);
1940 if (has_second_rf) {
1941 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1942 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1946 if (has_second_rf && this_mode == NEWMV &&
1947 mbmi->interp_filter == EIGHTTAP) {
1948 // adjust src pointers
1950 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1952 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1953 mi_row, mi_col, seg_mvs[i],
1955 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1956 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1957 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1958 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1960 // restore src pointers
1961 mi_buf_restore(x, orig_src, orig_pre);
1964 bsi->rdstat[i][mode_idx].brate =
1965 set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1966 frame_mv, seg_mvs[i], bsi->ref_mv,
1967 x->nmvjointcost, x->mvcost);
1969 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1970 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1971 mode_mv[this_mode][ref].as_int;
1972 if (num_4x4_blocks_wide > 1)
1973 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1974 mode_mv[this_mode][ref].as_int;
1975 if (num_4x4_blocks_high > 1)
1976 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1977 mode_mv[this_mode][ref].as_int;
1980 // Trap vectors that reach beyond the UMV borders
1981 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1983 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1986 if (filter_idx > 0) {
1987 BEST_SEG_INFO *ref_bsi = bsi_buf;
1991 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1992 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1993 have_ref &= mode_mv[this_mode][ref].as_int ==
1994 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1997 if (filter_idx > 1 && !subpelmv && !have_ref) {
1998 ref_bsi = bsi_buf + 1;
2000 for (ref = 0; ref < 1 + has_second_rf; ++ref)
2001 have_ref &= mode_mv[this_mode][ref].as_int ==
2002 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2005 if (!subpelmv && have_ref &&
2006 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2007 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2008 sizeof(SEG_RDSTAT));
2009 if (num_4x4_blocks_wide > 1)
2010 bsi->rdstat[i + 1][mode_idx].eobs =
2011 ref_bsi->rdstat[i + 1][mode_idx].eobs;
2012 if (num_4x4_blocks_high > 1)
2013 bsi->rdstat[i + 2][mode_idx].eobs =
2014 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2016 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2017 mode_selected = this_mode;
2018 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2024 bsi->rdstat[i][mode_idx].brdcost =
2025 encode_inter_mb_segment(cpi, x,
2026 bsi->segment_rd - this_segment_rd, i,
2027 &bsi->rdstat[i][mode_idx].byrate,
2028 &bsi->rdstat[i][mode_idx].bdist,
2029 &bsi->rdstat[i][mode_idx].bsse,
2030 bsi->rdstat[i][mode_idx].ta,
2031 bsi->rdstat[i][mode_idx].tl,
2033 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2034 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2035 bsi->rdstat[i][mode_idx].brate, 0);
2036 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2037 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2038 if (num_4x4_blocks_wide > 1)
2039 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2040 if (num_4x4_blocks_high > 1)
2041 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2044 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2045 mode_selected = this_mode;
2046 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2048 } /*for each 4x4 mode*/
2050 if (best_rd == INT64_MAX) {
2052 for (iy = i + 1; iy < 4; ++iy)
2053 for (midx = 0; midx < INTER_MODES; ++midx)
2054 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2055 bsi->segment_rd = INT64_MAX;
2059 mode_idx = INTER_OFFSET(mode_selected);
2060 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2061 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2063 set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2064 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2067 br += bsi->rdstat[i][mode_idx].brate;
2068 bd += bsi->rdstat[i][mode_idx].bdist;
2069 block_sse += bsi->rdstat[i][mode_idx].bsse;
2070 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2071 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2073 if (this_segment_rd > bsi->segment_rd) {
2075 for (iy = i + 1; iy < 4; ++iy)
2076 for (midx = 0; midx < INTER_MODES; ++midx)
2077 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2078 bsi->segment_rd = INT64_MAX;
2082 } /* for each label */
2086 bsi->segment_yrate = segmentyrate;
2087 bsi->segment_rd = this_segment_rd;
2088 bsi->sse = block_sse;
2090 // update the coding decisions
2091 for (k = 0; k < 4; ++k)
2092 bsi->modes[k] = mi->bmi[k].as_mode;
2094 if (bsi->segment_rd > best_rd)
2096 /* set it to the best */
2097 for (i = 0; i < 4; i++) {
2098 mode_idx = INTER_OFFSET(bsi->modes[i]);
2099 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2100 if (has_second_ref(mbmi))
2101 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2102 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2103 mi->bmi[i].as_mode = bsi->modes[i];
2107 * used to set mbmi->mv.as_int
2109 *returntotrate = bsi->r;
2110 *returndistortion = bsi->d;
2111 *returnyrate = bsi->segment_yrate;
2112 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2114 mbmi->mode = bsi->modes[3];
2116 return bsi->segment_rd;
2119 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2120 uint8_t *ref_y_buffer, int ref_y_stride,
2121 int ref_frame, BLOCK_SIZE block_size ) {
2122 MACROBLOCKD *xd = &x->e_mbd;
2123 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2128 int best_sad = INT_MAX;
2129 int this_sad = INT_MAX;
2132 uint8_t *src_y_ptr = x->plane[0].src.buf;
2134 int row_offset, col_offset;
2135 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2136 (cpi->sf.adaptive_motion_search &&
2137 cpi->common.show_frame &&
2138 block_size < cpi->sf.max_partition_size);
2141 pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2142 pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2143 pred_mv[2] = x->pred_mv[ref_frame];
2145 // Get the sad for each candidate reference mv
2146 for (i = 0; i < num_mv_refs; i++) {
2147 this_mv.as_int = pred_mv[i].as_int;
2149 max_mv = MAX(max_mv,
2150 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2151 // only need to check zero mv once
2152 if (!this_mv.as_int && zero_seen)
2155 zero_seen = zero_seen || !this_mv.as_int;
2157 row_offset = this_mv.as_mv.row >> 3;
2158 col_offset = this_mv.as_mv.col >> 3;
2159 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2161 // Find sad for current vector.
2162 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2163 ref_y_ptr, ref_y_stride,
2166 // Note if it is the best so far.
2167 if (this_sad < best_sad) {
2168 best_sad = this_sad;
2173 // Note the index of the mv that worked best in the reference list.
2174 x->mv_best_ref_index[ref_frame] = best_index;
2175 x->max_mv_context[ref_frame] = max_mv;
2176 x->pred_mv_sad[ref_frame] = best_sad;
2179 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2180 const MACROBLOCKD *xd,
2182 unsigned int *ref_costs_single,
2183 unsigned int *ref_costs_comp,
2184 vp9_prob *comp_mode_p) {
2185 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2187 if (seg_ref_active) {
2188 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2189 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2192 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2193 vp9_prob comp_inter_p = 128;
2195 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2196 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2197 *comp_mode_p = comp_inter_p;
2202 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2204 if (cm->reference_mode != COMPOUND_REFERENCE) {
2205 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2206 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2207 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2209 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2210 base_cost += vp9_cost_bit(comp_inter_p, 0);
2212 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2213 ref_costs_single[ALTREF_FRAME] = base_cost;
2214 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2215 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2216 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2217 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2218 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2220 ref_costs_single[LAST_FRAME] = 512;
2221 ref_costs_single[GOLDEN_FRAME] = 512;
2222 ref_costs_single[ALTREF_FRAME] = 512;
2224 if (cm->reference_mode != SINGLE_REFERENCE) {
2225 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2226 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2228 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2229 base_cost += vp9_cost_bit(comp_inter_p, 1);
2231 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2232 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2234 ref_costs_comp[LAST_FRAME] = 512;
2235 ref_costs_comp[GOLDEN_FRAME] = 512;
2240 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2243 int_mv *second_ref_mv,
2244 int64_t comp_pred_diff[REFERENCE_MODES],
2245 const int64_t tx_size_diff[TX_MODES],
2246 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2247 MACROBLOCKD *const xd = &x->e_mbd;
2249 // Take a snapshot of the coding context so it can be
2250 // restored if we decide to encode this way
2251 ctx->skip = x->skip;
2252 ctx->best_mode_index = mode_index;
2253 ctx->mic = *xd->mi[0];
2255 ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2256 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2258 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2259 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2260 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2262 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2263 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2264 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2267 static void setup_pred_block(const MACROBLOCKD *xd,
2268 struct buf_2d dst[MAX_MB_PLANE],
2269 const YV12_BUFFER_CONFIG *src,
2270 int mi_row, int mi_col,
2271 const struct scale_factors *scale,
2272 const struct scale_factors *scale_uv) {
2275 dst[0].buf = src->y_buffer;
2276 dst[0].stride = src->y_stride;
2277 dst[1].buf = src->u_buffer;
2278 dst[2].buf = src->v_buffer;
2279 dst[1].stride = dst[2].stride = src->uv_stride;
2281 dst[3].buf = src->alpha_buffer;
2282 dst[3].stride = src->alpha_stride;
2285 // TODO(jkoleszar): Make scale factors per-plane data
2286 for (i = 0; i < MAX_MB_PLANE; i++) {
2287 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2288 i ? scale_uv : scale,
2289 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2293 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2294 const TileInfo *const tile,
2295 MV_REFERENCE_FRAME ref_frame,
2296 BLOCK_SIZE block_size,
2297 int mi_row, int mi_col,
2298 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2299 int_mv frame_near_mv[MAX_REF_FRAMES],
2300 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2301 const VP9_COMMON *cm = &cpi->common;
2302 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2303 MACROBLOCKD *const xd = &x->e_mbd;
2304 MODE_INFO *const mi = xd->mi[0];
2305 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2306 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2308 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2309 // use the UV scaling factors.
2310 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2312 // Gets an initial list of candidate vectors from neighbours and orders them
2313 vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2315 // Candidate refinement carried out at encoder and decoder
2316 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2317 &frame_nearest_mv[ref_frame],
2318 &frame_near_mv[ref_frame]);
2320 // Further refinement that is encode side only to test the top few candidates
2321 // in full and choose the best as the centre point for subsequent searches.
2322 // The current implementation doesn't support scaling.
2323 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2324 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2325 ref_frame, block_size);
2328 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2330 const VP9_COMMON *const cm = &cpi->common;
2331 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2332 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2333 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2336 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
2337 const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2338 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2339 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2340 return SWITCHABLE_INTERP_RATE_FACTOR *
2341 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
2344 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2346 int mi_row, int mi_col,
2347 int_mv *tmp_mv, int *rate_mv) {
2348 MACROBLOCKD *xd = &x->e_mbd;
2349 const VP9_COMMON *cm = &cpi->common;
2350 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2351 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2352 int bestsme = INT_MAX;
2353 int further_steps, step_param;
2354 int sadpb = x->sadperbit16;
2356 int ref = mbmi->ref_frame[0];
2357 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2359 int tmp_col_min = x->mv_col_min;
2360 int tmp_col_max = x->mv_col_max;
2361 int tmp_row_min = x->mv_row_min;
2362 int tmp_row_max = x->mv_row_max;
2364 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2368 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2369 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2370 pred_mv[2] = x->pred_mv[ref].as_mv;
2372 if (scaled_ref_frame) {
2374 // Swap out the reference frame for a version that's been scaled to
2375 // match the resolution of the current frame, allowing the existing
2376 // motion search code to be used without additional modifications.
2377 for (i = 0; i < MAX_MB_PLANE; i++)
2378 backup_yv12[i] = xd->plane[i].pre[0];
2380 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2383 vp9_set_mv_search_range(x, &ref_mv);
2385 // Work out the size of the first step in the mv step search.
2386 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2387 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
2388 // Take wtd average of the step_params based on the last frame's
2389 // max mv magnitude and that based on the best ref mvs of the current
2390 // block for the given reference.
2391 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2392 cpi->mv_step_param) >> 1;
2394 step_param = cpi->mv_step_param;
2397 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2399 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2400 b_width_log2(bsize)));
2401 step_param = MAX(step_param, boffset);
2404 if (cpi->sf.adaptive_motion_search) {
2405 int bwl = b_width_log2_lookup[bsize];
2406 int bhl = b_height_log2_lookup[bsize];
2408 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2413 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2414 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2415 x->pred_mv[ref].as_int = 0;
2416 tmp_mv->as_int = INVALID_MV;
2418 if (scaled_ref_frame) {
2420 for (i = 0; i < MAX_MB_PLANE; i++)
2421 xd->plane[i].pre[0] = backup_yv12[i];
2428 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2433 // Further step/diamond searches as necessary
2434 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2436 if (cpi->sf.search_method == FAST_DIAMOND) {
2437 bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
2438 &cpi->fn_ptr[bsize], 1,
2439 &ref_mv, &tmp_mv->as_mv);
2440 if (bestsme < INT_MAX)
2441 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2442 &cpi->fn_ptr[bsize], 1);
2443 } else if (cpi->sf.search_method == FAST_HEX) {
2444 bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
2445 &cpi->fn_ptr[bsize], 1,
2446 &ref_mv, &tmp_mv->as_mv);
2447 if (bestsme < INT_MAX)
2448 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2449 &cpi->fn_ptr[bsize], 1);
2450 } else if (cpi->sf.search_method == HEX) {
2451 bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2452 &cpi->fn_ptr[bsize], 1,
2453 &ref_mv, &tmp_mv->as_mv);
2454 if (bestsme < INT_MAX)
2455 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2456 &cpi->fn_ptr[bsize], 1);
2457 } else if (cpi->sf.search_method == SQUARE) {
2458 bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2459 &cpi->fn_ptr[bsize], 1,
2460 &ref_mv, &tmp_mv->as_mv);
2461 if (bestsme < INT_MAX)
2462 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2463 &cpi->fn_ptr[bsize], 1);
2464 } else if (cpi->sf.search_method == BIGDIA) {
2465 bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2466 &cpi->fn_ptr[bsize], 1,
2467 &ref_mv, &tmp_mv->as_mv);
2468 if (bestsme < INT_MAX)
2469 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2470 &cpi->fn_ptr[bsize], 1);
2472 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2473 sadpb, further_steps, 1,
2474 &cpi->fn_ptr[bsize],
2475 &ref_mv, &tmp_mv->as_mv);
2478 x->mv_col_min = tmp_col_min;
2479 x->mv_col_max = tmp_col_max;
2480 x->mv_row_min = tmp_row_min;
2481 x->mv_row_max = tmp_row_max;
2483 if (bestsme < INT_MAX) {
2484 int dis; /* TODO: use dis in distortion calculation later. */
2485 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2486 cm->allow_high_precision_mv,
2488 &cpi->fn_ptr[bsize],
2489 cpi->sf.subpel_force_stop,
2490 cpi->sf.subpel_iters_per_step,
2491 x->nmvjointcost, x->mvcost,
2492 &dis, &x->pred_sse[ref]);
2494 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2495 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2497 if (cpi->sf.adaptive_motion_search && cm->show_frame)
2498 x->pred_mv[ref].as_int = tmp_mv->as_int;
2500 if (scaled_ref_frame) {
2502 for (i = 0; i < MAX_MB_PLANE; i++)
2503 xd->plane[i].pre[0] = backup_yv12[i];
2507 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2510 int mi_row, int mi_col,
2511 int_mv single_newmv[MAX_REF_FRAMES],
2513 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2514 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2515 MACROBLOCKD *xd = &x->e_mbd;
2516 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2517 const int refs[2] = { mbmi->ref_frame[0],
2518 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2521 // Prediction buffer from second frame.
2522 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2523 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2525 // Do joint motion search in compound mode to get more accurate mv.
2526 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2527 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2528 int last_besterr[2] = {INT_MAX, INT_MAX};
2529 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2530 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2531 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2534 for (ref = 0; ref < 2; ++ref) {
2535 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2537 if (scaled_ref_frame[ref]) {
2539 // Swap out the reference frame for a version that's been scaled to
2540 // match the resolution of the current frame, allowing the existing
2541 // motion search code to be used without additional modifications.
2542 for (i = 0; i < MAX_MB_PLANE; i++)
2543 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2544 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2548 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2551 // Allow joint search multiple times iteratively for each ref frame
2552 // and break out the search loop if it couldn't find better mv.
2553 for (ite = 0; ite < 4; ite++) {
2554 struct buf_2d ref_yv12[2];
2555 int bestsme = INT_MAX;
2556 int sadpb = x->sadperbit16;
2558 int search_range = 3;
2560 int tmp_col_min = x->mv_col_min;
2561 int tmp_col_max = x->mv_col_max;
2562 int tmp_row_min = x->mv_row_min;
2563 int tmp_row_max = x->mv_row_max;
2566 // Initialized here because of compiler problem in Visual Studio.
2567 ref_yv12[0] = xd->plane[0].pre[0];
2568 ref_yv12[1] = xd->plane[0].pre[1];
2570 // Get pred block from second frame.
2571 vp9_build_inter_predictor(ref_yv12[!id].buf,
2572 ref_yv12[!id].stride,
2574 &frame_mv[refs[!id]].as_mv,
2575 &xd->block_refs[!id]->sf,
2577 kernel, MV_PRECISION_Q3,
2578 mi_col * MI_SIZE, mi_row * MI_SIZE);
2580 // Compound motion search on first ref frame.
2582 xd->plane[0].pre[0] = ref_yv12[id];
2583 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2585 // Use mv result from single mode as mvp.
2586 tmp_mv = frame_mv[refs[id]].as_mv;
2591 // Small-range full-pixel motion search
2592 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2594 &cpi->fn_ptr[bsize],
2595 &ref_mv[id].as_mv, second_pred,
2597 if (bestsme < INT_MAX)
2598 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2599 second_pred, &cpi->fn_ptr[bsize], 1);
2601 x->mv_col_min = tmp_col_min;
2602 x->mv_col_max = tmp_col_max;
2603 x->mv_row_min = tmp_row_min;
2604 x->mv_row_max = tmp_row_max;
2606 if (bestsme < INT_MAX) {
2607 int dis; /* TODO: use dis in distortion calculation later. */
2609 bestsme = cpi->find_fractional_mv_step_comp(
2612 cpi->common.allow_high_precision_mv,
2614 &cpi->fn_ptr[bsize],
2615 0, cpi->sf.subpel_iters_per_step,
2616 x->nmvjointcost, x->mvcost,
2617 &dis, &sse, second_pred,
2622 xd->plane[0].pre[0] = scaled_first_yv12;
2624 if (bestsme < last_besterr[id]) {
2625 frame_mv[refs[id]].as_mv = tmp_mv;
2626 last_besterr[id] = bestsme;
2634 for (ref = 0; ref < 2; ++ref) {
2635 if (scaled_ref_frame[ref]) {
2636 // restore the predictor
2638 for (i = 0; i < MAX_MB_PLANE; i++)
2639 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2642 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2643 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2644 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2647 vpx_free(second_pred);
2650 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2651 uint8_t *orig_dst[MAX_MB_PLANE],
2652 int orig_dst_stride[MAX_MB_PLANE]) {
2654 for (i = 0; i < MAX_MB_PLANE; i++) {
2655 xd->plane[i].dst.buf = orig_dst[i];
2656 xd->plane[i].dst.stride = orig_dst_stride[i];
2660 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2662 int64_t txfm_cache[],
2663 int *rate2, int64_t *distortion,
2665 int *rate_y, int64_t *distortion_y,
2666 int *rate_uv, int64_t *distortion_uv,
2667 int *mode_excluded, int *disable_skip,
2668 INTERP_FILTER *best_filter,
2669 int_mv (*mode_mv)[MAX_REF_FRAMES],
2670 int mi_row, int mi_col,
2671 int_mv single_newmv[MAX_REF_FRAMES],
2673 const int64_t ref_best_rd) {
2674 VP9_COMMON *cm = &cpi->common;
2675 RD_OPT *rd_opt = &cpi->rd;
2676 MACROBLOCKD *xd = &x->e_mbd;
2677 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2678 const int is_comp_pred = has_second_ref(mbmi);
2679 const int num_refs = is_comp_pred ? 2 : 1;
2680 const int this_mode = mbmi->mode;
2681 int_mv *frame_mv = mode_mv[this_mode];
2683 int refs[2] = { mbmi->ref_frame[0],
2684 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2686 int64_t this_rd = 0;
2687 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2688 int pred_exists = 0;
2690 int64_t rd, best_rd = INT64_MAX;
2691 int best_needs_copy = 0;
2692 uint8_t *orig_dst[MAX_MB_PLANE];
2693 int orig_dst_stride[MAX_MB_PLANE];
2697 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2698 frame_mv[refs[1]].as_int == INVALID_MV)
2702 if (this_mode == NEWMV) {
2705 // Initialize mv using single prediction mode result.
2706 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2707 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2709 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2710 joint_motion_search(cpi, x, bsize, frame_mv,
2711 mi_row, mi_col, single_newmv, &rate_mv);
2713 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2714 &mbmi->ref_mvs[refs[0]][0].as_mv,
2715 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2716 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2717 &mbmi->ref_mvs[refs[1]][0].as_mv,
2718 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2723 single_motion_search(cpi, x, bsize, mi_row, mi_col,
2725 if (tmp_mv.as_int == INVALID_MV)
2728 frame_mv[refs[0]].as_int =
2729 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2730 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2734 for (i = 0; i < num_refs; ++i) {
2735 cur_mv[i] = frame_mv[refs[i]];
2736 // Clip "next_nearest" so that it does not extend to far out of image
2737 if (this_mode != NEWMV)
2738 clamp_mv2(&cur_mv[i].as_mv, xd);
2740 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2742 mbmi->mv[i].as_int = cur_mv[i].as_int;
2745 // do first prediction into the destination buffer. Do the next
2746 // prediction into a temporary buffer. Then keep track of which one
2747 // of these currently holds the best predictor, and use the other
2748 // one for future predictions. In the end, copy from tmp_buf to
2749 // dst if necessary.
2750 for (i = 0; i < MAX_MB_PLANE; i++) {
2751 orig_dst[i] = xd->plane[i].dst.buf;
2752 orig_dst_stride[i] = xd->plane[i].dst.stride;
2755 /* We don't include the cost of the second reference here, because there
2756 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2757 * words if you present them in that order, the second one is always known
2758 * if the first is known */
2759 *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2761 if (!(*mode_excluded))
2762 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2763 : cm->reference_mode == COMPOUND_REFERENCE;
2766 // Are all MVs integer pel for Y and UV
2767 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2769 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2771 // Search for best switchable filter by checking the variance of
2772 // pred error irrespective of whether the filter will be used
2773 rd_opt->mask_filter = 0;
2774 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2775 rd_opt->filter_cache[i] = INT64_MAX;
2777 if (cm->interp_filter != BILINEAR) {
2778 *best_filter = EIGHTTAP;
2779 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2780 *best_filter = EIGHTTAP;
2783 int tmp_rate_sum = 0;
2784 int64_t tmp_dist_sum = 0;
2786 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2789 mbmi->interp_filter = i;
2790 rs = vp9_get_switchable_rate(cpi);
2791 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2793 if (i > 0 && intpel_mv) {
2794 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2795 rd_opt->filter_cache[i] = rd;
2796 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2797 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2798 if (cm->interp_filter == SWITCHABLE)
2800 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2803 int64_t dist_sum = 0;
2804 if ((cm->interp_filter == SWITCHABLE &&
2805 (!i || best_needs_copy)) ||
2806 (cm->interp_filter != SWITCHABLE &&
2807 (cm->interp_filter == mbmi->interp_filter ||
2808 (i == 0 && intpel_mv)))) {
2809 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2811 for (j = 0; j < MAX_MB_PLANE; j++) {
2812 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2813 xd->plane[j].dst.stride = 64;
2816 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2817 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2819 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2820 rd_opt->filter_cache[i] = rd;
2821 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2822 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2823 if (cm->interp_filter == SWITCHABLE)
2825 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2827 if (i == 0 && intpel_mv) {
2828 tmp_rate_sum = rate_sum;
2829 tmp_dist_sum = dist_sum;
2833 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2834 if (rd / 2 > ref_best_rd) {
2835 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2839 newbest = i == 0 || rd < best_rd;
2843 *best_filter = mbmi->interp_filter;
2844 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2845 best_needs_copy = !best_needs_copy;
2848 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2849 (cm->interp_filter != SWITCHABLE &&
2850 cm->interp_filter == mbmi->interp_filter)) {
2854 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2857 // Set the appropriate filter
2858 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2859 cm->interp_filter : *best_filter;
2860 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2863 if (best_needs_copy) {
2864 // again temporarily set the buffers to local memory to prevent a memcpy
2865 for (i = 0; i < MAX_MB_PLANE; i++) {
2866 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2867 xd->plane[i].dst.stride = 64;
2871 // Handles the special case when a filter that is not in the
2872 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2873 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2876 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2879 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2880 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2881 // if current pred_error modeled rd is substantially more than the best
2882 // so far, do not bother doing full rd
2883 if (rd / 2 > ref_best_rd) {
2884 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2889 if (cm->interp_filter == SWITCHABLE)
2890 *rate2 += vp9_get_switchable_rate(cpi);
2892 if (!is_comp_pred) {
2893 if (!x->in_active_map) {
2898 } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2899 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2900 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2901 unsigned int var, sse;
2902 // Skipping threshold for ac.
2903 unsigned int thresh_ac;
2904 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2905 // Use extreme low threshold for static frames to limit skipping.
2906 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2907 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2908 // The encode_breakout input
2909 const unsigned int min_thresh =
2910 MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2912 // Calculate threshold according to dequant value.
2913 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2914 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2916 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2917 xd->plane[0].dst.buf,
2918 xd->plane[0].dst.stride, &sse);
2920 // Adjust threshold according to partition size.
2921 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2922 b_height_log2_lookup[bsize]);
2924 // Y skipping condition checking
2925 if (sse < thresh_ac || sse == 0) {
2926 // Skipping threshold for dc
2927 unsigned int thresh_dc;
2929 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2931 // dc skipping checking
2932 if ((sse - var) < thresh_dc || sse == var) {
2933 unsigned int sse_u, sse_v;
2934 unsigned int var_u, var_v;
2936 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2937 x->plane[1].src.stride,
2938 xd->plane[1].dst.buf,
2939 xd->plane[1].dst.stride, &sse_u);
2941 // U skipping condition checking
2942 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2943 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2944 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2945 x->plane[2].src.stride,
2946 xd->plane[2].dst.buf,
2947 xd->plane[2].dst.stride, &sse_v);
2949 // V skipping condition checking
2950 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2951 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2954 // The cost of skip bit needs to be added.
2955 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2957 // Scaling factor for SSE from spatial domain to frequency domain
2958 // is 16. Adjust distortion accordingly.
2959 *distortion_uv = (sse_u + sse_v) << 4;
2960 *distortion = (sse << 4) + *distortion_uv;
2963 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2972 int skippable_y, skippable_uv;
2973 int64_t sseuv = INT64_MAX;
2974 int64_t rdcosty = INT64_MAX;
2976 // Y cost and distortion
2977 inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2978 bsize, txfm_cache, ref_best_rd);
2980 if (*rate_y == INT_MAX) {
2982 *distortion = INT64_MAX;
2983 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2988 *distortion += *distortion_y;
2990 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2991 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2993 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2994 bsize, ref_best_rd - rdcosty);
2995 if (*rate_uv == INT_MAX) {
2997 *distortion = INT64_MAX;
2998 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3004 *distortion += *distortion_uv;
3005 *skippable = skippable_y && skippable_uv;
3008 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3009 return this_rd; // if 0, this will be re-calculated by caller
3012 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3014 struct macroblock_plane *const p = x->plane;
3015 struct macroblockd_plane *const pd = x->e_mbd.plane;
3018 for (i = 0; i < max_plane; ++i) {
3019 p[i].coeff = ctx->coeff_pbuf[i][1];
3020 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3021 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3022 p[i].eobs = ctx->eobs_pbuf[i][1];
3024 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3025 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3026 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3027 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3029 ctx->coeff_pbuf[i][0] = p[i].coeff;
3030 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
3031 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3032 ctx->eobs_pbuf[i][0] = p[i].eobs;
3036 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3037 int *returnrate, int64_t *returndist,
3039 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3040 VP9_COMMON *const cm = &cpi->common;
3041 MACROBLOCKD *const xd = &x->e_mbd;
3042 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3043 int y_skip = 0, uv_skip = 0;
3044 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3045 TX_SIZE max_uv_tx_size;
3048 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3050 if (bsize >= BLOCK_8X8) {
3051 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3052 &dist_y, &y_skip, bsize, tx_cache,
3053 best_rd) >= best_rd) {
3054 *returnrate = INT_MAX;
3057 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3058 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3059 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3062 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3063 &dist_y, best_rd) >= best_rd) {
3064 *returnrate = INT_MAX;
3067 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3068 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3069 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3072 if (y_skip && uv_skip) {
3073 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3074 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3075 *returndist = dist_y + dist_uv;
3076 vp9_zero(ctx->tx_rd_diff);
3079 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3080 *returndist = dist_y + dist_uv;
3081 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3082 for (i = 0; i < TX_MODES; i++) {
3083 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3084 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3086 ctx->tx_rd_diff[i] = 0;
3090 ctx->mic = *xd->mi[0];
3093 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
3095 return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
3098 // Updating rd_thresh_freq_fact[] here means that the different
3099 // partition/block sizes are handled independently based on the best
3100 // choice for the current partition. It may well be better to keep a scaled
3101 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3102 // combination that wins out.
3103 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
3104 int best_mode_index) {
3105 if (cpi->sf.adaptive_rd_thresh > 0) {
3106 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
3108 for (mode = 0; mode < top_mode; ++mode) {
3109 int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
3111 if (mode == best_mode_index) {
3112 *fact -= (*fact >> 3);
3114 *fact = MIN(*fact + RD_THRESH_INC,
3115 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3121 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3122 const TileInfo *const tile,
3123 int mi_row, int mi_col,
3125 int64_t *returndistortion,
3127 PICK_MODE_CONTEXT *ctx,
3128 int64_t best_rd_so_far) {
3129 VP9_COMMON *const cm = &cpi->common;
3130 RD_OPT *const rd_opt = &cpi->rd;
3131 MACROBLOCKD *const xd = &x->e_mbd;
3132 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3133 const struct segmentation *const seg = &cm->seg;
3134 PREDICTION_MODE this_mode;
3135 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3136 unsigned char segment_id = mbmi->segment_id;
3138 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3139 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3140 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3141 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3143 int64_t best_rd = best_rd_so_far;
3144 int64_t best_tx_rd[TX_MODES];
3145 int64_t best_tx_diff[TX_MODES];
3146 int64_t best_pred_diff[REFERENCE_MODES];
3147 int64_t best_pred_rd[REFERENCE_MODES];
3148 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3149 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3150 MB_MODE_INFO best_mbmode = { 0 };
3151 int mode_index, best_mode_index = -1;
3152 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3153 vp9_prob comp_mode_p;
3154 int64_t best_intra_rd = INT64_MAX;
3155 int64_t best_inter_rd = INT64_MAX;
3156 PREDICTION_MODE best_intra_mode = DC_PRED;
3157 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3158 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3159 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3160 int64_t dist_uv[TX_SIZES];
3161 int skip_uv[TX_SIZES];
3162 PREDICTION_MODE mode_uv[TX_SIZES];
3163 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3164 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3165 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3166 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3168 int mode_skip_mask = 0;
3169 int mode_skip_start = cpi->sf.mode_skip_start + 1;
3170 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3171 const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
3172 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3173 const int intra_y_mode_mask =
3174 cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3175 int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3177 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3179 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3182 for (i = 0; i < REFERENCE_MODES; ++i)
3183 best_pred_rd[i] = INT64_MAX;
3184 for (i = 0; i < TX_MODES; i++)
3185 best_tx_rd[i] = INT64_MAX;
3186 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3187 best_filter_rd[i] = INT64_MAX;
3188 for (i = 0; i < TX_SIZES; i++)
3189 rate_uv_intra[i] = INT_MAX;
3190 for (i = 0; i < MAX_REF_FRAMES; ++i)
3191 x->pred_sse[i] = INT_MAX;
3193 *returnrate = INT_MAX;
3195 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3196 x->pred_mv_sad[ref_frame] = INT_MAX;
3197 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3198 vp9_setup_buffer_inter(cpi, x, tile,
3199 ref_frame, bsize, mi_row, mi_col,
3200 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3202 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3203 frame_mv[ZEROMV][ref_frame].as_int = 0;
3206 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3207 // All modes from vp9_mode_order that use this frame as any ref
3208 static const int ref_frame_mask_all[] = {
3209 0x0, 0x123291, 0x25c444, 0x39b722
3211 // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3212 // this frame as their primary ref
3213 static const int ref_frame_mask_fixedmv[] = {
3214 0x0, 0x121281, 0x24c404, 0x080102
3216 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3217 // Skip modes for missing references
3218 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3219 } else if (cpi->sf.reference_masking) {
3220 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3221 // Skip fixed mv modes for poor references
3222 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3223 mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3228 // If the segment reference frame feature is enabled....
3229 // then do nothing if the current ref frame is not allowed..
3230 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3231 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3232 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3236 // If the segment skip feature is enabled....
3237 // then do nothing if the current mode is not allowed..
3238 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3239 const int inter_non_zero_mode_mask = 0x1F7F7;
3240 mode_skip_mask |= inter_non_zero_mode_mask;
3243 // Disable this drop out case if the ref frame
3244 // segment level feature is enabled for this segment. This is to
3245 // prevent the possibility that we end up unable to pick any mode.
3246 if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3247 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3248 // unless ARNR filtering is enabled in which case we want
3249 // an unfiltered alternative. We allow near/nearest as well
3250 // because they may result in zero-zero MVs but be cheaper.
3251 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3253 ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3254 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3255 mode_skip_mask |= (1 << THR_NEARA);
3256 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3257 mode_skip_mask |= (1 << THR_NEARESTA);
3261 // TODO(JBB): This is to make up for the fact that we don't have sad
3262 // functions that work when the block size reads outside the umv. We
3263 // should fix this either by making the motion search just work on
3264 // a representative block in the boundary ( first ) and then implement a
3265 // function that does sads when inside the border..
3266 if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3267 const int new_modes_mask =
3268 (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3269 (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3270 mode_skip_mask |= new_modes_mask;
3273 if (bsize > cpi->sf.max_intra_bsize) {
3274 mode_skip_mask |= 0xFF30808;
3277 if (!x->in_active_map) {
3279 assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3280 if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3281 mode_index = THR_NEARESTMV;
3282 else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3283 mode_index = THR_NEARMV;
3285 mode_index = THR_ZEROMV;
3286 mode_skip_mask = ~(1 << mode_index);
3287 mode_skip_start = MAX_MODES;
3288 disable_inter_mode_mask = 0;
3291 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3292 int mode_excluded = 0;
3293 int64_t this_rd = INT64_MAX;
3294 int disable_skip = 0;
3295 int compmode_cost = 0;
3296 int rate2 = 0, rate_y = 0, rate_uv = 0;
3297 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3299 int64_t tx_cache[TX_MODES];
3302 int64_t total_sse = INT64_MAX;
3305 // Look at the reference frame of the best mode so far and set the
3306 // skip mask to look at a subset of the remaining modes.
3307 if (mode_index == mode_skip_start && best_mode_index >= 0) {
3308 switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3312 mode_skip_mask |= LAST_FRAME_MODE_MASK;
3315 mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3318 mode_skip_mask |= ALT_REF_MODE_MASK;
3321 case MAX_REF_FRAMES:
3322 assert(0 && "Invalid Reference frame");
3325 if (mode_skip_mask & (1 << mode_index))
3328 // Test best rd so far against threshold for trying this mode.
3329 if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
3330 rd_thresh_freq_fact[mode_index]))
3333 this_mode = vp9_mode_order[mode_index].mode;
3334 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3335 if (ref_frame != INTRA_FRAME &&
3336 disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3338 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3340 comp_pred = second_ref_frame > INTRA_FRAME;
3342 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3343 best_mode_index >=0 &&
3344 vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3346 if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3347 ref_frame != best_inter_ref_frame &&
3348 second_ref_frame != best_inter_ref_frame)
3350 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3352 if (ref_frame != INTRA_FRAME)
3353 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3356 if (ref_frame == INTRA_FRAME) {
3357 if (!(intra_y_mode_mask & (1 << this_mode)))
3359 if (this_mode != DC_PRED) {
3360 // Disable intra modes other than DC_PRED for blocks with low variance
3361 // Threshold for intra skipping based on source variance
3362 // TODO(debargha): Specialize the threshold for super block sizes
3363 const unsigned int skip_intra_var_thresh = 64;
3364 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3365 x->source_variance < skip_intra_var_thresh)
3367 // Only search the oblique modes if the best so far is
3368 // one of the neighboring directional modes
3369 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3370 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3371 if (best_mode_index >= 0 &&
3372 vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3375 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3376 if (conditional_skipintra(this_mode, best_intra_mode))
3381 if (x->in_active_map &&
3382 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
3383 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
3384 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3385 disable_inter_mode_mask, this_mode, ref_frames))
3390 mbmi->mode = this_mode;
3391 mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3392 mbmi->ref_frame[0] = ref_frame;
3393 mbmi->ref_frame[1] = second_ref_frame;
3394 // Evaluate all sub-pel filters irrespective of whether we can use
3395 // them for this frame.
3396 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3397 : cm->interp_filter;
3399 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3401 // Select prediction reference frames.
3402 for (i = 0; i < MAX_MB_PLANE; i++) {
3403 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3405 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3408 for (i = 0; i < TX_MODES; ++i)
3409 tx_cache[i] = INT64_MAX;
3411 #ifdef MODE_TEST_HIT_STATS
3413 // Keep a rcord of the number of test hits at each size
3414 cpi->mode_test_hits[bsize]++;
3417 if (ref_frame == INTRA_FRAME) {
3419 intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3420 bsize, tx_cache, best_rd);
3422 if (rate_y == INT_MAX)
3425 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3426 if (rate_uv_intra[uv_tx] == INT_MAX) {
3427 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3428 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3429 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3432 rate_uv = rate_uv_tokenonly[uv_tx];
3433 distortion_uv = dist_uv[uv_tx];
3434 skippable = skippable && skip_uv[uv_tx];
3435 mbmi->uv_mode = mode_uv[uv_tx];
3437 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3438 if (this_mode != DC_PRED && this_mode != TM_PRED)
3439 rate2 += intra_cost_penalty;
3440 distortion2 = distortion_y + distortion_uv;
3442 this_rd = handle_inter_mode(cpi, x, bsize,
3444 &rate2, &distortion2, &skippable,
3445 &rate_y, &distortion_y,
3446 &rate_uv, &distortion_uv,
3447 &mode_excluded, &disable_skip,
3448 &tmp_best_filter, frame_mv,
3450 single_newmv, &total_sse, best_rd);
3451 if (this_rd == INT64_MAX)
3454 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3456 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3457 rate2 += compmode_cost;
3460 // Estimate the reference frame signaling cost and add it
3461 // to the rolling cost variable.
3463 rate2 += ref_costs_comp[ref_frame];
3465 rate2 += ref_costs_single[ref_frame];
3468 if (!disable_skip) {
3469 // Test for the condition where skip block will be activated
3470 // because there are no non zero coefficients and make any
3471 // necessary adjustment for rate. Ignore if skip is coded at
3472 // segment level as the cost wont have been added in.
3473 // Is Mb level skip allowed (i.e. not coded at segment level).
3474 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3478 // Back out the coefficient coding costs
3479 rate2 -= (rate_y + rate_uv);
3480 // for best yrd calculation
3483 if (mb_skip_allowed) {
3486 // Cost the skip mb case
3487 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3489 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3490 rate2 += prob_skip_cost;
3493 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3494 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3495 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3496 // Add in the cost of the no skip flag.
3497 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3499 // FIXME(rbultje) make this work for splitmv also
3500 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3501 distortion2 = total_sse;
3502 assert(total_sse >= 0);
3503 rate2 -= (rate_y + rate_uv);
3508 } else if (mb_skip_allowed) {
3509 // Add in the cost of the no skip flag.
3510 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3513 // Calculate the final RD estimate for this mode.
3514 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3517 if (ref_frame == INTRA_FRAME) {
3518 // Keep record of best intra rd
3519 if (this_rd < best_intra_rd) {
3520 best_intra_rd = this_rd;
3521 best_intra_mode = mbmi->mode;
3524 // Keep record of best inter rd with single reference
3525 if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3526 best_inter_rd = this_rd;
3527 best_inter_ref_frame = ref_frame;
3531 if (!disable_skip && ref_frame == INTRA_FRAME) {
3532 for (i = 0; i < REFERENCE_MODES; ++i)
3533 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3534 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3535 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3538 // Store the respective mode distortions for later use.
3539 if (mode_distortions[this_mode] == -1
3540 || distortion2 < mode_distortions[this_mode]) {
3541 mode_distortions[this_mode] = distortion2;
3544 // Did this mode help.. i.e. is it the new best mode
3545 if (this_rd < best_rd || x->skip) {
3546 int max_plane = MAX_MB_PLANE;
3547 if (!mode_excluded) {
3548 // Note index of best mode so far
3549 best_mode_index = mode_index;
3551 if (ref_frame == INTRA_FRAME) {
3552 /* required for left and above block mv */
3553 mbmi->mv[0].as_int = 0;
3557 *returnrate = rate2;
3558 *returndistortion = distortion2;
3560 best_mbmode = *mbmi;
3561 best_skip2 = this_skip2;
3562 if (!x->select_txfm_size)
3563 swap_block_ptr(x, ctx, max_plane);
3564 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3565 sizeof(uint8_t) * ctx->num_4x4_blk);
3567 // TODO(debargha): enhance this test with a better distortion prediction
3568 // based on qp, activity mask and history
3569 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3570 (mode_index > MIN_EARLY_TERM_INDEX)) {
3571 const int qstep = xd->plane[0].dequant[1];
3572 // TODO(debargha): Enhance this by specializing for each mode_index
3574 if (x->source_variance < UINT_MAX) {
3575 const int var_adjust = (x->source_variance < 16);
3576 scale -= var_adjust;
3578 if (ref_frame > INTRA_FRAME &&
3579 distortion2 * scale < qstep * qstep) {
3586 /* keep record of best compound/single-only prediction */
3587 if (!disable_skip && ref_frame != INTRA_FRAME) {
3588 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3590 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3591 single_rate = rate2 - compmode_cost;
3592 hybrid_rate = rate2;
3594 single_rate = rate2;
3595 hybrid_rate = rate2 + compmode_cost;
3598 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3599 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3602 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3603 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3606 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3607 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3610 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3611 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3613 /* keep record of best filter type */
3614 if (!mode_excluded && cm->interp_filter != BILINEAR) {
3615 int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3616 SWITCHABLE_FILTERS : cm->interp_filter];
3618 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3620 if (ref == INT64_MAX)
3622 else if (rd_opt->filter_cache[i] == INT64_MAX)
3623 // when early termination is triggered, the encoder does not have
3624 // access to the rate-distortion cost. it only knows that the cost
3625 // should be above the maximum valid value. hence it takes the known
3626 // maximum plus an arbitrary constant as the rate-distortion cost.
3627 adj_rd = rd_opt->mask_filter - ref + 10;
3629 adj_rd = rd_opt->filter_cache[i] - ref;
3632 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3637 /* keep record of best txfm size */
3638 if (bsize < BLOCK_32X32) {
3639 if (bsize < BLOCK_16X16)
3640 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3642 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3644 if (!mode_excluded && this_rd != INT64_MAX) {
3645 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3646 int64_t adj_rd = INT64_MAX;
3647 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3649 if (adj_rd < best_tx_rd[i])
3650 best_tx_rd[i] = adj_rd;
3657 if (x->skip && !comp_pred)
3661 if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3664 // If we used an estimate for the uv intra rd in the loop above...
3665 if (cpi->sf.use_uv_intra_rd_estimate) {
3666 // Do Intra UV best rd mode selection if best mode choice above was intra.
3667 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3669 *mbmi = best_mbmode;
3670 uv_tx_size = get_uv_tx_size(mbmi);
3671 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3672 &rate_uv_tokenonly[uv_tx_size],
3673 &dist_uv[uv_tx_size],
3674 &skip_uv[uv_tx_size],
3675 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3680 assert((cm->interp_filter == SWITCHABLE) ||
3681 (cm->interp_filter == best_mbmode.interp_filter) ||
3682 !is_inter_block(&best_mbmode));
3684 update_rd_thresh_fact(cpi, bsize, best_mode_index);
3687 *mbmi = best_mbmode;
3688 x->skip |= best_skip2;
3690 for (i = 0; i < REFERENCE_MODES; ++i) {
3691 if (best_pred_rd[i] == INT64_MAX)
3692 best_pred_diff[i] = INT_MIN;
3694 best_pred_diff[i] = best_rd - best_pred_rd[i];
3698 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3699 if (best_filter_rd[i] == INT64_MAX)
3700 best_filter_diff[i] = 0;
3702 best_filter_diff[i] = best_rd - best_filter_rd[i];
3704 if (cm->interp_filter == SWITCHABLE)
3705 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3706 for (i = 0; i < TX_MODES; i++) {
3707 if (best_tx_rd[i] == INT64_MAX)
3708 best_tx_diff[i] = 0;
3710 best_tx_diff[i] = best_rd - best_tx_rd[i];
3713 vp9_zero(best_filter_diff);
3714 vp9_zero(best_tx_diff);
3717 if (!x->in_active_map) {
3718 assert(mbmi->ref_frame[0] == LAST_FRAME);
3719 assert(mbmi->ref_frame[1] == NONE);
3720 assert(mbmi->mode == NEARESTMV ||
3721 mbmi->mode == NEARMV ||
3722 mbmi->mode == ZEROMV);
3723 assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3724 assert(mbmi->mode == mbmi->uv_mode);
3727 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3728 store_coding_context(x, ctx, best_mode_index,
3729 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3730 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3731 mbmi->ref_frame[1]][0],
3732 best_pred_diff, best_tx_diff, best_filter_diff);
3738 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3739 const TileInfo *const tile,
3740 int mi_row, int mi_col,
3742 int64_t *returndistortion,
3744 PICK_MODE_CONTEXT *ctx,
3745 int64_t best_rd_so_far) {
3746 VP9_COMMON *const cm = &cpi->common;
3747 RD_OPT *const rd_opt = &cpi->rd;
3748 MACROBLOCKD *const xd = &x->e_mbd;
3749 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3750 const struct segmentation *const seg = &cm->seg;
3751 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3752 unsigned char segment_id = mbmi->segment_id;
3754 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3755 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3756 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3758 int64_t best_rd = best_rd_so_far;
3759 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3760 static const int64_t best_tx_diff[TX_MODES] = { 0 };
3761 int64_t best_pred_diff[REFERENCE_MODES];
3762 int64_t best_pred_rd[REFERENCE_MODES];
3763 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3764 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3765 MB_MODE_INFO best_mbmode = { 0 };
3766 int ref_index, best_ref_index = 0;
3767 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3768 vp9_prob comp_mode_p;
3769 int64_t best_inter_rd = INT64_MAX;
3770 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3771 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3772 int rate_uv_intra, rate_uv_tokenonly;
3775 PREDICTION_MODE mode_uv = DC_PRED;
3776 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3777 int_mv seg_mvs[4][MAX_REF_FRAMES];
3778 b_mode_info best_bmodes[4];
3780 int ref_frame_mask = 0;
3781 int mode_skip_mask = 0;
3783 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3784 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3786 for (i = 0; i < 4; i++) {
3788 for (j = 0; j < MAX_REF_FRAMES; j++)
3789 seg_mvs[i][j].as_int = INVALID_MV;
3792 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3795 for (i = 0; i < REFERENCE_MODES; ++i)
3796 best_pred_rd[i] = INT64_MAX;
3797 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3798 best_filter_rd[i] = INT64_MAX;
3799 rate_uv_intra = INT_MAX;
3801 *returnrate = INT_MAX;
3803 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3804 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3805 vp9_setup_buffer_inter(cpi, x, tile,
3806 ref_frame, bsize, mi_row, mi_col,
3807 frame_mv[NEARESTMV], frame_mv[NEARMV],
3810 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3811 frame_mv[ZEROMV][ref_frame].as_int = 0;
3814 for (ref_frame = LAST_FRAME;
3815 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3817 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3818 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3819 ref_frame_mask |= (1 << ref_frame);
3825 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3826 int mode_excluded = 0;
3827 int64_t this_rd = INT64_MAX;
3828 int disable_skip = 0;
3829 int compmode_cost = 0;
3830 int rate2 = 0, rate_y = 0, rate_uv = 0;
3831 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3835 int64_t total_sse = INT_MAX;
3838 ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3839 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3841 // Look at the reference frame of the best mode so far and set the
3842 // skip mask to look at a subset of the remaining modes.
3843 if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3844 if (ref_index == 3) {
3845 switch (vp9_ref_order[best_ref_index].ref_frame[0]) {
3850 mode_skip_mask = 0x0010;
3853 mode_skip_mask = 0x0008;
3856 mode_skip_mask = 0x0000;
3859 case MAX_REF_FRAMES:
3860 assert(0 && "Invalid Reference frame");
3863 if (mode_skip_mask & (1 << ref_index))
3867 // Test best rd so far against threshold for trying this mode.
3868 if (rd_less_than_thresh(best_rd,
3869 rd_opt->threshes[segment_id][bsize][ref_index],
3870 rd_opt->thresh_freq_fact[bsize][ref_index]))
3873 if (ref_frame > INTRA_FRAME &&
3874 !(cpi->ref_frame_flags & flag_list[ref_frame])) {
3878 comp_pred = second_ref_frame > INTRA_FRAME;
3880 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3882 // Do not allow compound prediction if the segment level reference frame
3883 // feature is in use as in this case there can only be one reference.
3884 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3886 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3887 vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME)
3889 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3890 ref_frame != best_inter_ref_frame &&
3891 second_ref_frame != best_inter_ref_frame)
3895 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3897 if (ref_frame > INTRA_FRAME &&
3898 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3901 if (second_ref_frame > INTRA_FRAME &&
3902 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3906 mode_excluded = mode_excluded ? mode_excluded
3907 : cm->reference_mode == SINGLE_REFERENCE;
3908 } else if (ref_frame != INTRA_FRAME) {
3909 mode_excluded = mode_excluded ? mode_excluded
3910 : cm->reference_mode == COMPOUND_REFERENCE;
3913 // If the segment reference frame feature is enabled....
3914 // then do nothing if the current ref frame is not allowed..
3915 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3916 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3919 // If the segment skip feature is enabled....
3920 // then do nothing if the current mode is not allowed..
3921 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3922 ref_frame != INTRA_FRAME) {
3924 // Disable this drop out case if the ref frame
3925 // segment level feature is enabled for this segment. This is to
3926 // prevent the possibility that we end up unable to pick any mode.
3927 } else if (!vp9_segfeature_active(seg, segment_id,
3928 SEG_LVL_REF_FRAME)) {
3929 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3930 // unless ARNR filtering is enabled in which case we want
3931 // an unfiltered alternative. We allow near/nearest as well
3932 // because they may result in zero-zero MVs but be cheaper.
3933 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3937 mbmi->tx_size = TX_4X4;
3938 mbmi->uv_mode = DC_PRED;
3939 mbmi->ref_frame[0] = ref_frame;
3940 mbmi->ref_frame[1] = second_ref_frame;
3941 // Evaluate all sub-pel filters irrespective of whether we can use
3942 // them for this frame.
3943 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3944 : cm->interp_filter;
3946 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3948 // Select prediction reference frames.
3949 for (i = 0; i < MAX_MB_PLANE; i++) {
3950 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3952 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3955 #ifdef MODE_TEST_HIT_STATS
3957 // Keep a rcord of the number of test hits at each size
3958 cpi->mode_test_hits[bsize]++;
3961 if (ref_frame == INTRA_FRAME) {
3963 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3964 &distortion_y, best_rd) >= best_rd)
3967 rate2 += intra_cost_penalty;
3968 distortion2 += distortion_y;
3970 if (rate_uv_intra == INT_MAX) {
3971 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3977 rate2 += rate_uv_intra;
3978 rate_uv = rate_uv_tokenonly;
3979 distortion2 += dist_uv;
3980 distortion_uv = dist_uv;
3981 mbmi->uv_mode = mode_uv;
3985 int64_t this_rd_thresh;
3986 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3987 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3988 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3989 int tmp_best_skippable = 0;
3990 int switchable_filter_index;
3991 int_mv *second_ref = comp_pred ?
3992 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3993 b_mode_info tmp_best_bmodes[16];
3994 MB_MODE_INFO tmp_best_mbmode;
3995 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3996 int pred_exists = 0;
3999 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4000 rd_opt->threshes[segment_id][bsize][THR_LAST] :
4001 rd_opt->threshes[segment_id][bsize][THR_ALTR];
4002 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4003 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4004 rd_opt->mask_filter = 0;
4005 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4006 rd_opt->filter_cache[i] = INT64_MAX;
4008 if (cm->interp_filter != BILINEAR) {
4009 tmp_best_filter = EIGHTTAP;
4010 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
4011 tmp_best_filter = EIGHTTAP;
4012 } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4013 ctx->pred_interp_filter < SWITCHABLE) {
4014 tmp_best_filter = ctx->pred_interp_filter;
4015 } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4016 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4017 ctx->pred_interp_filter : 0;
4019 for (switchable_filter_index = 0;
4020 switchable_filter_index < SWITCHABLE_FILTERS;
4021 ++switchable_filter_index) {
4024 mbmi->interp_filter = switchable_filter_index;
4025 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
4026 &mbmi->ref_mvs[ref_frame][0],
4027 second_ref, best_yrd, &rate,
4028 &rate_y, &distortion,
4029 &skippable, &total_sse,
4030 (int) this_rd_thresh, seg_mvs,
4031 bsi, switchable_filter_index,
4034 if (tmp_rd == INT64_MAX)
4036 rs = vp9_get_switchable_rate(cpi);
4037 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4038 rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
4039 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
4040 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
4042 if (cm->interp_filter == SWITCHABLE)
4045 rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
4047 newbest = (tmp_rd < tmp_best_rd);
4049 tmp_best_filter = mbmi->interp_filter;
4050 tmp_best_rd = tmp_rd;
4052 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4053 (mbmi->interp_filter == cm->interp_filter &&
4054 cm->interp_filter != SWITCHABLE)) {
4055 tmp_best_rdu = tmp_rd;
4056 tmp_best_rate = rate;
4057 tmp_best_ratey = rate_y;
4058 tmp_best_distortion = distortion;
4059 tmp_best_sse = total_sse;
4060 tmp_best_skippable = skippable;
4061 tmp_best_mbmode = *mbmi;
4062 for (i = 0; i < 4; i++) {
4063 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4064 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4067 if (switchable_filter_index == 0 &&
4068 cpi->sf.use_rd_breakout &&
4069 best_rd < INT64_MAX) {
4070 if (tmp_best_rdu / 2 > best_rd) {
4071 // skip searching the other filters if the first is
4072 // already substantially larger than the best so far
4073 tmp_best_filter = mbmi->interp_filter;
4074 tmp_best_rdu = INT64_MAX;
4079 } // switchable_filter_index loop
4083 if (tmp_best_rdu == INT64_MAX && pred_exists)
4086 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4087 tmp_best_filter : cm->interp_filter);
4089 // Handles the special case when a filter that is not in the
4090 // switchable list (bilinear, 6-tap) is indicated at the frame level
4091 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
4092 &mbmi->ref_mvs[ref_frame][0],
4093 second_ref, best_yrd, &rate, &rate_y,
4094 &distortion, &skippable, &total_sse,
4095 (int) this_rd_thresh, seg_mvs, bsi, 0,
4097 if (tmp_rd == INT64_MAX)
4100 total_sse = tmp_best_sse;
4101 rate = tmp_best_rate;
4102 rate_y = tmp_best_ratey;
4103 distortion = tmp_best_distortion;
4104 skippable = tmp_best_skippable;
4105 *mbmi = tmp_best_mbmode;
4106 for (i = 0; i < 4; i++)
4107 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4111 distortion2 += distortion;
4113 if (cm->interp_filter == SWITCHABLE)
4114 rate2 += vp9_get_switchable_rate(cpi);
4117 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4118 : cm->reference_mode == COMPOUND_REFERENCE;
4120 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4122 tmp_best_rdu = best_rd -
4123 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4124 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4126 if (tmp_best_rdu > 0) {
4127 // If even the 'Y' rd value of split is higher than best so far
4128 // then dont bother looking at UV
4129 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4131 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4132 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4133 if (rate_uv == INT_MAX)
4136 distortion2 += distortion_uv;
4137 skippable = skippable && uv_skippable;
4138 total_sse += uv_sse;
4142 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4143 rate2 += compmode_cost;
4145 // Estimate the reference frame signaling cost and add it
4146 // to the rolling cost variable.
4147 if (second_ref_frame > INTRA_FRAME) {
4148 rate2 += ref_costs_comp[ref_frame];
4150 rate2 += ref_costs_single[ref_frame];
4153 if (!disable_skip) {
4154 // Test for the condition where skip block will be activated
4155 // because there are no non zero coefficients and make any
4156 // necessary adjustment for rate. Ignore if skip is coded at
4157 // segment level as the cost wont have been added in.
4158 // Is Mb level skip allowed (i.e. not coded at segment level).
4159 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4162 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4163 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4164 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4165 // Add in the cost of the no skip flag.
4166 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4168 // FIXME(rbultje) make this work for splitmv also
4169 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4170 distortion2 = total_sse;
4171 assert(total_sse >= 0);
4172 rate2 -= (rate_y + rate_uv);
4177 } else if (mb_skip_allowed) {
4178 // Add in the cost of the no skip flag.
4179 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4182 // Calculate the final RD estimate for this mode.
4183 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4186 // Keep record of best inter rd with single reference
4187 if (is_inter_block(mbmi) &&
4188 !has_second_ref(mbmi) &&
4190 this_rd < best_inter_rd) {
4191 best_inter_rd = this_rd;
4192 best_inter_ref_frame = ref_frame;
4195 if (!disable_skip && ref_frame == INTRA_FRAME) {
4196 for (i = 0; i < REFERENCE_MODES; ++i)
4197 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4198 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4199 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4202 // Did this mode help.. i.e. is it the new best mode
4203 if (this_rd < best_rd || x->skip) {
4204 if (!mode_excluded) {
4205 int max_plane = MAX_MB_PLANE;
4206 // Note index of best mode so far
4207 best_ref_index = ref_index;
4209 if (ref_frame == INTRA_FRAME) {
4210 /* required for left and above block mv */
4211 mbmi->mv[0].as_int = 0;
4215 *returnrate = rate2;
4216 *returndistortion = distortion2;
4218 best_yrd = best_rd -
4219 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4220 best_mbmode = *mbmi;
4221 best_skip2 = this_skip2;
4222 if (!x->select_txfm_size)
4223 swap_block_ptr(x, ctx, max_plane);
4224 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4225 sizeof(uint8_t) * ctx->num_4x4_blk);
4227 for (i = 0; i < 4; i++)
4228 best_bmodes[i] = xd->mi[0]->bmi[i];
4230 // TODO(debargha): enhance this test with a better distortion prediction
4231 // based on qp, activity mask and history
4232 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4233 (ref_index > MIN_EARLY_TERM_INDEX)) {
4234 const int qstep = xd->plane[0].dequant[1];
4235 // TODO(debargha): Enhance this by specializing for each mode_index
4237 if (x->source_variance < UINT_MAX) {
4238 const int var_adjust = (x->source_variance < 16);
4239 scale -= var_adjust;
4241 if (ref_frame > INTRA_FRAME &&
4242 distortion2 * scale < qstep * qstep) {
4249 /* keep record of best compound/single-only prediction */
4250 if (!disable_skip && ref_frame != INTRA_FRAME) {
4251 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4253 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4254 single_rate = rate2 - compmode_cost;
4255 hybrid_rate = rate2;
4257 single_rate = rate2;
4258 hybrid_rate = rate2 + compmode_cost;
4261 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4262 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4264 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4265 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4266 } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4267 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4269 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4270 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4273 /* keep record of best filter type */
4274 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4275 cm->interp_filter != BILINEAR) {
4276 int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
4277 SWITCHABLE_FILTERS : cm->interp_filter];
4279 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4280 if (ref == INT64_MAX)
4282 else if (rd_opt->filter_cache[i] == INT64_MAX)
4283 // when early termination is triggered, the encoder does not have
4284 // access to the rate-distortion cost. it only knows that the cost
4285 // should be above the maximum valid value. hence it takes the known
4286 // maximum plus an arbitrary constant as the rate-distortion cost.
4287 adj_rd = rd_opt->mask_filter - ref + 10;
4289 adj_rd = rd_opt->filter_cache[i] - ref;
4292 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4299 if (x->skip && !comp_pred)
4303 if (best_rd >= best_rd_so_far)
4306 // If we used an estimate for the uv intra rd in the loop above...
4307 if (cpi->sf.use_uv_intra_rd_estimate) {
4308 // Do Intra UV best rd mode selection if best mode choice above was intra.
4309 if (vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) {
4310 *mbmi = best_mbmode;
4311 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
4319 if (best_rd == INT64_MAX) {
4320 *returnrate = INT_MAX;
4321 *returndistortion = INT64_MAX;
4325 assert((cm->interp_filter == SWITCHABLE) ||
4326 (cm->interp_filter == best_mbmode.interp_filter) ||
4327 !is_inter_block(&best_mbmode));
4329 update_rd_thresh_fact(cpi, bsize, best_ref_index);
4332 *mbmi = best_mbmode;
4333 x->skip |= best_skip2;
4334 if (!is_inter_block(&best_mbmode)) {
4335 for (i = 0; i < 4; i++)
4336 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4338 for (i = 0; i < 4; ++i)
4339 vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4341 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4342 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4345 for (i = 0; i < REFERENCE_MODES; ++i) {
4346 if (best_pred_rd[i] == INT64_MAX)
4347 best_pred_diff[i] = INT_MIN;
4349 best_pred_diff[i] = best_rd - best_pred_rd[i];
4353 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4354 if (best_filter_rd[i] == INT64_MAX)
4355 best_filter_diff[i] = 0;
4357 best_filter_diff[i] = best_rd - best_filter_rd[i];
4359 if (cm->interp_filter == SWITCHABLE)
4360 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4362 vp9_zero(best_filter_diff);
4365 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4366 store_coding_context(x, ctx, best_ref_index,
4367 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4368 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4369 mbmi->ref_frame[1]][0],
4370 best_pred_diff, best_tx_diff, best_filter_diff);
4375 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
4377 RD_OPT *const rd = &cpi->rd;
4379 // Set baseline threshold values
4380 for (i = 0; i < MAX_MODES; ++i)
4381 rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
4383 rd->thresh_mult[THR_NEARESTMV] = 0;
4384 rd->thresh_mult[THR_NEARESTG] = 0;
4385 rd->thresh_mult[THR_NEARESTA] = 0;
4387 rd->thresh_mult[THR_DC] += 1000;
4389 rd->thresh_mult[THR_NEWMV] += 1000;
4390 rd->thresh_mult[THR_NEWA] += 1000;
4391 rd->thresh_mult[THR_NEWG] += 1000;
4393 rd->thresh_mult[THR_NEARMV] += 1000;
4394 rd->thresh_mult[THR_NEARA] += 1000;
4395 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
4396 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
4398 rd->thresh_mult[THR_TM] += 1000;
4400 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
4401 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
4402 rd->thresh_mult[THR_NEARG] += 1000;
4403 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
4404 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
4406 rd->thresh_mult[THR_ZEROMV] += 2000;
4407 rd->thresh_mult[THR_ZEROG] += 2000;
4408 rd->thresh_mult[THR_ZEROA] += 2000;
4409 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
4410 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
4412 rd->thresh_mult[THR_H_PRED] += 2000;
4413 rd->thresh_mult[THR_V_PRED] += 2000;
4414 rd->thresh_mult[THR_D45_PRED ] += 2500;
4415 rd->thresh_mult[THR_D135_PRED] += 2500;
4416 rd->thresh_mult[THR_D117_PRED] += 2500;
4417 rd->thresh_mult[THR_D153_PRED] += 2500;
4418 rd->thresh_mult[THR_D207_PRED] += 2500;
4419 rd->thresh_mult[THR_D63_PRED] += 2500;
4421 /* disable frame modes if flags not set */
4422 if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
4423 rd->thresh_mult[THR_NEWMV ] = INT_MAX;
4424 rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
4425 rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
4426 rd->thresh_mult[THR_NEARMV ] = INT_MAX;
4428 if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
4429 rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
4430 rd->thresh_mult[THR_ZEROG ] = INT_MAX;
4431 rd->thresh_mult[THR_NEARG ] = INT_MAX;
4432 rd->thresh_mult[THR_NEWG ] = INT_MAX;
4434 if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
4435 rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
4436 rd->thresh_mult[THR_ZEROA ] = INT_MAX;
4437 rd->thresh_mult[THR_NEARA ] = INT_MAX;
4438 rd->thresh_mult[THR_NEWA ] = INT_MAX;
4441 if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4442 (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
4443 rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
4444 rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
4445 rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
4446 rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
4448 if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4449 (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
4450 rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
4451 rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
4452 rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
4453 rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
4457 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
4458 const SPEED_FEATURES *const sf = &cpi->sf;
4459 RD_OPT *const rd = &cpi->rd;
4462 for (i = 0; i < MAX_REFS; ++i)
4463 rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
4465 rd->thresh_mult_sub8x8[THR_LAST] += 2500;
4466 rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
4467 rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
4468 rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
4469 rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
4470 rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
4472 // Check for masked out split cases.
4473 for (i = 0; i < MAX_REFS; i++)
4474 if (sf->disable_split_mask & (1 << i))
4475 rd->thresh_mult_sub8x8[i] = INT_MAX;
4477 // disable mode test if frame flag is not set
4478 if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
4479 rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
4480 if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
4481 rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
4482 if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
4483 rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
4484 if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4485 (VP9_LAST_FLAG | VP9_ALT_FLAG))
4486 rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
4487 if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4488 (VP9_GOLD_FLAG | VP9_ALT_FLAG))
4489 rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;