2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "./vp10_rtcd.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
23 #include "vp10/common/common.h"
24 #include "vp10/common/entropy.h"
25 #include "vp10/common/entropymode.h"
26 #include "vp10/common/mvref_common.h"
27 #include "vp10/common/pred_common.h"
28 #include "vp10/common/quant_common.h"
29 #include "vp10/common/reconinter.h"
30 #include "vp10/common/reconintra.h"
31 #include "vp10/common/seg_common.h"
33 #include "vp10/encoder/cost.h"
34 #include "vp10/encoder/encodemb.h"
35 #include "vp10/encoder/encodemv.h"
36 #include "vp10/encoder/encoder.h"
37 #include "vp10/encoder/mcomp.h"
38 #include "vp10/encoder/quantize.h"
39 #include "vp10/encoder/ratectrl.h"
40 #include "vp10/encoder/rd.h"
41 #include "vp10/encoder/tokenize.h"
43 #define RD_THRESH_POW 1.25
44 #define RD_MULT_EPB_RATIO 64
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
49 void vp10_rd_cost_reset(RD_COST *rd_cost) {
50 rd_cost->rate = INT_MAX;
51 rd_cost->dist = INT64_MAX;
52 rd_cost->rdcost = INT64_MAX;
55 void vp10_rd_cost_init(RD_COST *rd_cost) {
61 // The baseline rd thresholds for breaking out of the rd loop for
62 // certain modes are assumed to be based on 8x8 blocks.
63 // This table is used to correct for block size.
64 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
65 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
66 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
69 static void fill_mode_costs(VP10_COMP *cpi) {
70 const FRAME_CONTEXT *const fc = cpi->common.fc;
73 for (i = 0; i < INTRA_MODES; ++i)
74 for (j = 0; j < INTRA_MODES; ++j)
75 vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
76 vp10_intra_mode_tree);
78 vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree);
79 vp10_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
80 vp10_kf_uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
81 vp10_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
82 fc->uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
84 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
85 vp10_cost_tokens(cpi->switchable_interp_costs[i],
86 fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
88 for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
89 vp10_cost_tokens(cpi->palette_y_size_cost[i],
90 vp10_default_palette_y_size_prob[i],
91 vp10_palette_size_tree);
92 vp10_cost_tokens(cpi->palette_uv_size_cost[i],
93 vp10_default_palette_uv_size_prob[i],
94 vp10_palette_size_tree);
97 for (i = 0; i < PALETTE_MAX_SIZE - 1; ++i)
98 for (j = 0; j < PALETTE_COLOR_CONTEXTS; ++j) {
99 vp10_cost_tokens(cpi->palette_y_color_cost[i][j],
100 vp10_default_palette_y_color_prob[i][j],
101 vp10_palette_color_tree[i]);
102 vp10_cost_tokens(cpi->palette_uv_color_cost[i][j],
103 vp10_default_palette_uv_color_prob[i][j],
104 vp10_palette_color_tree[i]);
108 static void fill_token_costs(vp10_coeff_cost *c,
109 vp10_coeff_probs_model (*p)[PLANE_TYPES]) {
112 for (t = TX_4X4; t <= TX_32X32; ++t)
113 for (i = 0; i < PLANE_TYPES; ++i)
114 for (j = 0; j < REF_TYPES; ++j)
115 for (k = 0; k < COEF_BANDS; ++k)
116 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
117 vpx_prob probs[ENTROPY_NODES];
118 vp10_model_to_full_probs(p[t][i][j][k][l], probs);
119 vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs,
121 vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
123 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
124 c[t][i][j][k][1][l][EOB_TOKEN]);
128 // Values are now correlated to quantizer.
129 static int sad_per_bit16lut_8[QINDEX_RANGE];
130 static int sad_per_bit4lut_8[QINDEX_RANGE];
132 #if CONFIG_VP9_HIGHBITDEPTH
133 static int sad_per_bit16lut_10[QINDEX_RANGE];
134 static int sad_per_bit4lut_10[QINDEX_RANGE];
135 static int sad_per_bit16lut_12[QINDEX_RANGE];
136 static int sad_per_bit4lut_12[QINDEX_RANGE];
139 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
140 vpx_bit_depth_t bit_depth) {
142 // Initialize the sad lut tables using a formulaic calculation for now.
143 // This is to make it easier to resolve the impact of experimental changes
144 // to the quantizer tables.
145 for (i = 0; i < range; i++) {
146 const double q = vp10_convert_qindex_to_q(i, bit_depth);
147 bit16lut[i] = (int)(0.0418 * q + 2.4107);
148 bit4lut[i] = (int)(0.063 * q + 2.742);
152 void vp10_init_me_luts(void) {
153 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
155 #if CONFIG_VP9_HIGHBITDEPTH
156 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
158 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
163 static const int rd_boost_factor[16] = {
164 64, 32, 32, 32, 24, 16, 12, 12,
165 8, 8, 4, 4, 2, 2, 1, 0
167 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
168 128, 144, 128, 128, 144
171 int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
172 const int64_t q = vp10_dc_quant(qindex, 0, cpi->common.bit_depth);
173 #if CONFIG_VP9_HIGHBITDEPTH
175 switch (cpi->common.bit_depth) {
177 rdmult = 88 * q * q / 24;
180 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
183 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
186 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
190 int64_t rdmult = 88 * q * q / 24;
191 #endif // CONFIG_VP9_HIGHBITDEPTH
192 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
193 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
194 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
195 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
197 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
198 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
205 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
207 #if CONFIG_VP9_HIGHBITDEPTH
210 q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
213 q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
216 q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
219 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
224 q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
225 #endif // CONFIG_VP9_HIGHBITDEPTH
226 // TODO(debargha): Adjust the function below.
227 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
230 void vp10_initialize_me_consts(VP10_COMP *cpi, MACROBLOCK *x, int qindex) {
231 #if CONFIG_VP9_HIGHBITDEPTH
232 switch (cpi->common.bit_depth) {
234 x->sadperbit16 = sad_per_bit16lut_8[qindex];
235 x->sadperbit4 = sad_per_bit4lut_8[qindex];
238 x->sadperbit16 = sad_per_bit16lut_10[qindex];
239 x->sadperbit4 = sad_per_bit4lut_10[qindex];
242 x->sadperbit16 = sad_per_bit16lut_12[qindex];
243 x->sadperbit4 = sad_per_bit4lut_12[qindex];
246 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
250 x->sadperbit16 = sad_per_bit16lut_8[qindex];
251 x->sadperbit4 = sad_per_bit4lut_8[qindex];
252 #endif // CONFIG_VP9_HIGHBITDEPTH
255 static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
256 int i, bsize, segment_id;
258 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
260 clamp(vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
261 cm->y_dc_delta_q, 0, MAXQ);
262 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
264 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
265 // Threshold here seems unnecessarily harsh but fine given actual
266 // range of values used for cpi->sf.thresh_mult[].
267 const int t = q * rd_thresh_block_size_factor[bsize];
268 const int thresh_max = INT_MAX / t;
270 if (bsize >= BLOCK_8X8) {
271 for (i = 0; i < MAX_MODES; ++i)
272 rd->threshes[segment_id][bsize][i] =
273 rd->thresh_mult[i] < thresh_max
274 ? rd->thresh_mult[i] * t / 4
277 for (i = 0; i < MAX_REFS; ++i)
278 rd->threshes[segment_id][bsize][i] =
279 rd->thresh_mult_sub8x8[i] < thresh_max
280 ? rd->thresh_mult_sub8x8[i] * t / 4
287 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
288 VP10_COMMON *const cm = &cpi->common;
289 MACROBLOCK *const x = &cpi->td.mb;
290 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
291 RD_OPT *const rd = &cpi->rd;
294 vpx_clear_system_state();
296 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
297 rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
299 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
300 x->errorperbit += (x->errorperbit == 0);
302 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
303 cm->frame_type != KEY_FRAME) ? 0 : 1;
305 set_block_thresholds(cm, rd);
306 set_partition_probs(cm, xd);
308 fill_token_costs(x->token_costs, cm->fc->coef_probs);
310 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
311 cm->frame_type == KEY_FRAME) {
312 for (i = 0; i < PARTITION_CONTEXTS; ++i)
313 vp10_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
314 vp10_partition_tree);
317 fill_mode_costs(cpi);
319 if (!frame_is_intra_only(cm)) {
320 vp10_build_nmv_cost_table(x->nmvjointcost,
321 cm->allow_high_precision_mv ? x->nmvcost_hp
323 &cm->fc->nmvc, cm->allow_high_precision_mv);
325 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
326 vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
327 cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
331 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
332 // NOTE: The tables below must be of the same size.
334 // The functions described below are sampled at the four most significant
335 // bits of x^2 + 8 / 256.
338 // This table models the rate for a Laplacian source with given variance
339 // when quantized with a uniform quantizer with given stepsize. The
340 // closed form expression is:
341 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
342 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
343 // and H(x) is the binary entropy function.
344 static const int rate_tab_q10[] = {
345 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
346 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
347 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
348 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
349 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
350 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
351 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
352 1159, 1086, 1021, 963, 911, 864, 821, 781,
353 745, 680, 623, 574, 530, 490, 455, 424,
354 395, 345, 304, 269, 239, 213, 190, 171,
355 154, 126, 104, 87, 73, 61, 52, 44,
356 38, 28, 21, 16, 12, 10, 8, 6,
357 5, 3, 2, 1, 1, 1, 0, 0,
359 // Normalized distortion:
360 // This table models the normalized distortion for a Laplacian source
361 // with given variance when quantized with a uniform quantizer
362 // with given stepsize. The closed form expression is:
363 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
364 // where x = qpstep / sqrt(variance).
365 // Note the actual distortion is Dn * variance.
366 static const int dist_tab_q10[] = {
367 0, 0, 1, 1, 1, 2, 2, 2,
368 3, 3, 4, 5, 5, 6, 7, 7,
369 8, 9, 11, 12, 13, 15, 16, 17,
370 18, 21, 24, 26, 29, 31, 34, 36,
371 39, 44, 49, 54, 59, 64, 69, 73,
372 78, 88, 97, 106, 115, 124, 133, 142,
373 151, 167, 184, 200, 215, 231, 245, 260,
374 274, 301, 327, 351, 375, 397, 418, 439,
375 458, 495, 528, 559, 587, 613, 637, 659,
376 680, 717, 749, 777, 801, 823, 842, 859,
377 874, 899, 919, 936, 949, 960, 969, 977,
378 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
379 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
381 static const int xsq_iq_q10[] = {
382 0, 4, 8, 12, 16, 20, 24, 28,
383 32, 40, 48, 56, 64, 72, 80, 88,
384 96, 112, 128, 144, 160, 176, 192, 208,
385 224, 256, 288, 320, 352, 384, 416, 448,
386 480, 544, 608, 672, 736, 800, 864, 928,
387 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
388 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
389 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
390 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
391 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
392 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
393 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
394 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
396 const int tmp = (xsq_q10 >> 2) + 8;
397 const int k = get_msb(tmp) - 3;
398 const int xq = (k << 3) + ((tmp >> k) & 0x7);
399 const int one_q10 = 1 << 10;
400 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
401 const int b_q10 = one_q10 - a_q10;
402 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
403 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
406 void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
407 unsigned int qstep, int *rate,
409 // This function models the rate and distortion for a Laplacian
410 // source with given variance when quantized with a uniform quantizer
411 // with given stepsize. The closed form expressions are in:
412 // Hang and Chen, "Source Model for transform video coder and its
413 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
414 // Sys. for Video Tech., April 1997.
420 static const uint32_t MAX_XSQ_Q10 = 245727;
421 const uint64_t xsq_q10_64 =
422 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
423 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
424 model_rd_norm(xsq_q10, &r_q10, &d_q10);
425 *rate = ((r_q10 << n_log2) + 2) >> 2;
426 *dist = (var * (int64_t)d_q10 + 512) >> 10;
430 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
431 const struct macroblockd_plane *pd,
432 ENTROPY_CONTEXT t_above[16],
433 ENTROPY_CONTEXT t_left[16]) {
434 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
435 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
436 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
437 const ENTROPY_CONTEXT *const above = pd->above_context;
438 const ENTROPY_CONTEXT *const left = pd->left_context;
443 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
444 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
447 for (i = 0; i < num_4x4_w; i += 2)
448 t_above[i] = !!*(const uint16_t *)&above[i];
449 for (i = 0; i < num_4x4_h; i += 2)
450 t_left[i] = !!*(const uint16_t *)&left[i];
453 for (i = 0; i < num_4x4_w; i += 4)
454 t_above[i] = !!*(const uint32_t *)&above[i];
455 for (i = 0; i < num_4x4_h; i += 4)
456 t_left[i] = !!*(const uint32_t *)&left[i];
459 for (i = 0; i < num_4x4_w; i += 8)
460 t_above[i] = !!*(const uint64_t *)&above[i];
461 for (i = 0; i < num_4x4_h; i += 8)
462 t_left[i] = !!*(const uint64_t *)&left[i];
465 assert(0 && "Invalid transform size.");
470 void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
471 uint8_t *ref_y_buffer, int ref_y_stride,
472 int ref_frame, BLOCK_SIZE block_size) {
476 int best_sad = INT_MAX;
477 int this_sad = INT_MAX;
479 int near_same_nearest;
480 uint8_t *src_y_ptr = x->plane[0].src.buf;
482 const int num_mv_refs = MAX_MV_REF_CANDIDATES +
483 (cpi->sf.adaptive_motion_search &&
484 block_size < x->max_partition_size);
487 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
488 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
489 pred_mv[2] = x->pred_mv[ref_frame];
490 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
493 x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
494 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
495 // Get the sad for each candidate reference mv.
496 for (i = 0; i < num_mv_refs; ++i) {
497 const MV *this_mv = &pred_mv[i];
500 if (i == 1 && near_same_nearest)
502 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
503 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
504 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
506 if (fp_row ==0 && fp_col == 0 && zero_seen)
508 zero_seen |= (fp_row ==0 && fp_col == 0);
510 ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
511 // Find sad for current vector.
512 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
513 ref_y_ptr, ref_y_stride);
514 // Note if it is the best so far.
515 if (this_sad < best_sad) {
521 // Note the index of the mv that worked best in the reference list.
522 x->mv_best_ref_index[ref_frame] = best_index;
523 x->max_mv_context[ref_frame] = max_mv;
524 x->pred_mv_sad[ref_frame] = best_sad;
527 void vp10_setup_pred_block(const MACROBLOCKD *xd,
528 struct buf_2d dst[MAX_MB_PLANE],
529 const YV12_BUFFER_CONFIG *src,
530 int mi_row, int mi_col,
531 const struct scale_factors *scale,
532 const struct scale_factors *scale_uv) {
535 dst[0].buf = src->y_buffer;
536 dst[0].stride = src->y_stride;
537 dst[1].buf = src->u_buffer;
538 dst[2].buf = src->v_buffer;
539 dst[1].stride = dst[2].stride = src->uv_stride;
541 for (i = 0; i < MAX_MB_PLANE; ++i) {
542 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
543 i ? scale_uv : scale,
544 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
548 int vp10_raster_block_offset(BLOCK_SIZE plane_bsize,
549 int raster_block, int stride) {
550 const int bw = b_width_log2_lookup[plane_bsize];
551 const int y = 4 * (raster_block >> bw);
552 const int x = 4 * (raster_block & ((1 << bw) - 1));
553 return y * stride + x;
556 int16_t* vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
557 int raster_block, int16_t *base) {
558 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
559 return base + vp10_raster_block_offset(plane_bsize, raster_block, stride);
562 YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const VP10_COMP *cpi,
564 const VP10_COMMON *const cm = &cpi->common;
565 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
566 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
568 (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
569 &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
572 int vp10_get_switchable_rate(const VP10_COMP *cpi,
573 const MACROBLOCKD *const xd) {
574 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
575 const int ctx = vp10_get_pred_context_switchable_interp(xd);
576 return SWITCHABLE_INTERP_RATE_FACTOR *
577 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
580 void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
582 RD_OPT *const rd = &cpi->rd;
583 SPEED_FEATURES *const sf = &cpi->sf;
585 // Set baseline threshold values.
586 for (i = 0; i < MAX_MODES; ++i)
587 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
589 if (sf->adaptive_rd_thresh) {
590 rd->thresh_mult[THR_NEARESTMV] = 300;
591 rd->thresh_mult[THR_NEARESTG] = 300;
592 rd->thresh_mult[THR_NEARESTA] = 300;
594 rd->thresh_mult[THR_NEARESTMV] = 0;
595 rd->thresh_mult[THR_NEARESTG] = 0;
596 rd->thresh_mult[THR_NEARESTA] = 0;
599 rd->thresh_mult[THR_DC] += 1000;
601 rd->thresh_mult[THR_NEWMV] += 1000;
602 rd->thresh_mult[THR_NEWA] += 1000;
603 rd->thresh_mult[THR_NEWG] += 1000;
605 rd->thresh_mult[THR_NEARMV] += 1000;
606 rd->thresh_mult[THR_NEARA] += 1000;
607 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
608 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
610 rd->thresh_mult[THR_TM] += 1000;
612 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
613 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
614 rd->thresh_mult[THR_NEARG] += 1000;
615 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
616 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
618 rd->thresh_mult[THR_ZEROMV] += 2000;
619 rd->thresh_mult[THR_ZEROG] += 2000;
620 rd->thresh_mult[THR_ZEROA] += 2000;
621 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
622 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
624 rd->thresh_mult[THR_H_PRED] += 2000;
625 rd->thresh_mult[THR_V_PRED] += 2000;
626 rd->thresh_mult[THR_D45_PRED ] += 2500;
627 rd->thresh_mult[THR_D135_PRED] += 2500;
628 rd->thresh_mult[THR_D117_PRED] += 2500;
629 rd->thresh_mult[THR_D153_PRED] += 2500;
630 rd->thresh_mult[THR_D207_PRED] += 2500;
631 rd->thresh_mult[THR_D63_PRED] += 2500;
634 void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
635 static const int thresh_mult[2][MAX_REFS] =
636 {{2500, 2500, 2500, 4500, 4500, 2500},
637 {2000, 2000, 2000, 4000, 4000, 2000}};
638 RD_OPT *const rd = &cpi->rd;
639 const int idx = cpi->oxcf.mode == BEST;
640 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
643 void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
644 int bsize, int best_mode_index) {
646 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
648 for (mode = 0; mode < top_mode; ++mode) {
649 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
650 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
652 for (bs = min_size; bs <= max_size; ++bs) {
653 int *const fact = &factor_buf[bs][mode];
654 if (mode == best_mode_index) {
655 *fact -= (*fact >> 4);
657 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
664 int vp10_get_intra_cost_penalty(int qindex, int qdelta,
665 vpx_bit_depth_t bit_depth) {
666 const int q = vp10_dc_quant(qindex, qdelta, bit_depth);
667 #if CONFIG_VP9_HIGHBITDEPTH
674 return ROUND_POWER_OF_TWO(5 * q, 2);
676 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
681 #endif // CONFIG_VP9_HIGHBITDEPTH