2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "./vp10_rtcd.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
23 #include "vp10/common/common.h"
24 #include "vp10/common/entropy.h"
25 #include "vp10/common/entropymode.h"
26 #include "vp10/common/mvref_common.h"
27 #include "vp10/common/pred_common.h"
28 #include "vp10/common/quant_common.h"
29 #include "vp10/common/reconinter.h"
30 #include "vp10/common/reconintra.h"
31 #include "vp10/common/seg_common.h"
33 #include "vp10/encoder/cost.h"
34 #include "vp10/encoder/encodemb.h"
35 #include "vp10/encoder/encodemv.h"
36 #include "vp10/encoder/encoder.h"
37 #include "vp10/encoder/mcomp.h"
38 #include "vp10/encoder/quantize.h"
39 #include "vp10/encoder/ratectrl.h"
40 #include "vp10/encoder/rd.h"
41 #include "vp10/encoder/tokenize.h"
43 #define RD_THRESH_POW 1.25
44 #define RD_MULT_EPB_RATIO 64
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
49 void vp10_rd_cost_reset(RD_COST *rd_cost) {
50 rd_cost->rate = INT_MAX;
51 rd_cost->dist = INT64_MAX;
52 rd_cost->rdcost = INT64_MAX;
55 void vp10_rd_cost_init(RD_COST *rd_cost) {
61 // The baseline rd thresholds for breaking out of the rd loop for
62 // certain modes are assumed to be based on 8x8 blocks.
63 // This table is used to correct for block size.
64 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
65 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
66 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
69 static void fill_mode_costs(VP10_COMP *cpi) {
70 const FRAME_CONTEXT *const fc = cpi->common.fc;
73 for (i = 0; i < INTRA_MODES; ++i)
74 for (j = 0; j < INTRA_MODES; ++j)
75 vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
76 vp10_intra_mode_tree);
78 vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree);
79 for (i = 0; i < INTRA_MODES; ++i)
80 vp10_cost_tokens(cpi->intra_uv_mode_cost[i],
81 fc->uv_mode_prob[i], vp10_intra_mode_tree);
83 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
84 vp10_cost_tokens(cpi->switchable_interp_costs[i],
85 fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
87 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
88 for (j = 0; j < TX_TYPES; ++j)
89 vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
90 fc->intra_ext_tx_prob[i][j],
93 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
94 vp10_cost_tokens(cpi->inter_tx_type_costs[i],
95 fc->inter_ext_tx_prob[i],
100 static void fill_token_costs(vp10_coeff_cost *c,
101 vp10_coeff_probs_model (*p)[PLANE_TYPES]) {
104 for (t = TX_4X4; t <= TX_32X32; ++t)
105 for (i = 0; i < PLANE_TYPES; ++i)
106 for (j = 0; j < REF_TYPES; ++j)
107 for (k = 0; k < COEF_BANDS; ++k)
108 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
109 vpx_prob probs[ENTROPY_NODES];
110 vp10_model_to_full_probs(p[t][i][j][k][l], probs);
111 vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs,
113 vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
115 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
116 c[t][i][j][k][1][l][EOB_TOKEN]);
120 // Values are now correlated to quantizer.
121 static int sad_per_bit16lut_8[QINDEX_RANGE];
122 static int sad_per_bit4lut_8[QINDEX_RANGE];
124 #if CONFIG_VP9_HIGHBITDEPTH
125 static int sad_per_bit16lut_10[QINDEX_RANGE];
126 static int sad_per_bit4lut_10[QINDEX_RANGE];
127 static int sad_per_bit16lut_12[QINDEX_RANGE];
128 static int sad_per_bit4lut_12[QINDEX_RANGE];
131 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
132 vpx_bit_depth_t bit_depth) {
134 // Initialize the sad lut tables using a formulaic calculation for now.
135 // This is to make it easier to resolve the impact of experimental changes
136 // to the quantizer tables.
137 for (i = 0; i < range; i++) {
138 const double q = vp10_convert_qindex_to_q(i, bit_depth);
139 bit16lut[i] = (int)(0.0418 * q + 2.4107);
140 bit4lut[i] = (int)(0.063 * q + 2.742);
144 void vp10_init_me_luts(void) {
145 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
147 #if CONFIG_VP9_HIGHBITDEPTH
148 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
150 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
155 static const int rd_boost_factor[16] = {
156 64, 32, 32, 32, 24, 16, 12, 12,
157 8, 8, 4, 4, 2, 2, 1, 0
159 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
160 128, 144, 128, 128, 144
163 int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
164 const int64_t q = vp10_dc_quant(qindex, 0, cpi->common.bit_depth);
165 #if CONFIG_VP9_HIGHBITDEPTH
167 switch (cpi->common.bit_depth) {
169 rdmult = 88 * q * q / 24;
172 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
175 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
178 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
182 int64_t rdmult = 88 * q * q / 24;
183 #endif // CONFIG_VP9_HIGHBITDEPTH
184 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
185 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
186 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
187 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
189 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
190 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
197 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
199 #if CONFIG_VP9_HIGHBITDEPTH
202 q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
205 q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
208 q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
211 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
216 q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
217 #endif // CONFIG_VP9_HIGHBITDEPTH
218 // TODO(debargha): Adjust the function below.
219 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
222 void vp10_initialize_me_consts(VP10_COMP *cpi, MACROBLOCK *x, int qindex) {
223 #if CONFIG_VP9_HIGHBITDEPTH
224 switch (cpi->common.bit_depth) {
226 x->sadperbit16 = sad_per_bit16lut_8[qindex];
227 x->sadperbit4 = sad_per_bit4lut_8[qindex];
230 x->sadperbit16 = sad_per_bit16lut_10[qindex];
231 x->sadperbit4 = sad_per_bit4lut_10[qindex];
234 x->sadperbit16 = sad_per_bit16lut_12[qindex];
235 x->sadperbit4 = sad_per_bit4lut_12[qindex];
238 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
242 x->sadperbit16 = sad_per_bit16lut_8[qindex];
243 x->sadperbit4 = sad_per_bit4lut_8[qindex];
244 #endif // CONFIG_VP9_HIGHBITDEPTH
247 static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
248 int i, bsize, segment_id;
250 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
252 clamp(vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
253 cm->y_dc_delta_q, 0, MAXQ);
254 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
256 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
257 // Threshold here seems unnecessarily harsh but fine given actual
258 // range of values used for cpi->sf.thresh_mult[].
259 const int t = q * rd_thresh_block_size_factor[bsize];
260 const int thresh_max = INT_MAX / t;
262 if (bsize >= BLOCK_8X8) {
263 for (i = 0; i < MAX_MODES; ++i)
264 rd->threshes[segment_id][bsize][i] =
265 rd->thresh_mult[i] < thresh_max
266 ? rd->thresh_mult[i] * t / 4
269 for (i = 0; i < MAX_REFS; ++i)
270 rd->threshes[segment_id][bsize][i] =
271 rd->thresh_mult_sub8x8[i] < thresh_max
272 ? rd->thresh_mult_sub8x8[i] * t / 4
279 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
280 VP10_COMMON *const cm = &cpi->common;
281 MACROBLOCK *const x = &cpi->td.mb;
282 RD_OPT *const rd = &cpi->rd;
285 vpx_clear_system_state();
287 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
288 rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
290 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
291 x->errorperbit += (x->errorperbit == 0);
293 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
294 cm->frame_type != KEY_FRAME) ? 0 : 1;
296 set_block_thresholds(cm, rd);
298 fill_token_costs(x->token_costs, cm->fc->coef_probs);
300 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
301 cm->frame_type == KEY_FRAME) {
302 for (i = 0; i < PARTITION_CONTEXTS; ++i)
303 vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
304 vp10_partition_tree);
307 fill_mode_costs(cpi);
309 if (!frame_is_intra_only(cm)) {
310 vp10_build_nmv_cost_table(x->nmvjointcost,
311 cm->allow_high_precision_mv ? x->nmvcost_hp
313 &cm->fc->nmvc, cm->allow_high_precision_mv);
315 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
316 vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
317 cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
321 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
322 // NOTE: The tables below must be of the same size.
324 // The functions described below are sampled at the four most significant
325 // bits of x^2 + 8 / 256.
328 // This table models the rate for a Laplacian source with given variance
329 // when quantized with a uniform quantizer with given stepsize. The
330 // closed form expression is:
331 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
332 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
333 // and H(x) is the binary entropy function.
334 static const int rate_tab_q10[] = {
335 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
336 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
337 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
338 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
339 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
340 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
341 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
342 1159, 1086, 1021, 963, 911, 864, 821, 781,
343 745, 680, 623, 574, 530, 490, 455, 424,
344 395, 345, 304, 269, 239, 213, 190, 171,
345 154, 126, 104, 87, 73, 61, 52, 44,
346 38, 28, 21, 16, 12, 10, 8, 6,
347 5, 3, 2, 1, 1, 1, 0, 0,
349 // Normalized distortion:
350 // This table models the normalized distortion for a Laplacian source
351 // with given variance when quantized with a uniform quantizer
352 // with given stepsize. The closed form expression is:
353 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
354 // where x = qpstep / sqrt(variance).
355 // Note the actual distortion is Dn * variance.
356 static const int dist_tab_q10[] = {
357 0, 0, 1, 1, 1, 2, 2, 2,
358 3, 3, 4, 5, 5, 6, 7, 7,
359 8, 9, 11, 12, 13, 15, 16, 17,
360 18, 21, 24, 26, 29, 31, 34, 36,
361 39, 44, 49, 54, 59, 64, 69, 73,
362 78, 88, 97, 106, 115, 124, 133, 142,
363 151, 167, 184, 200, 215, 231, 245, 260,
364 274, 301, 327, 351, 375, 397, 418, 439,
365 458, 495, 528, 559, 587, 613, 637, 659,
366 680, 717, 749, 777, 801, 823, 842, 859,
367 874, 899, 919, 936, 949, 960, 969, 977,
368 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
369 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
371 static const int xsq_iq_q10[] = {
372 0, 4, 8, 12, 16, 20, 24, 28,
373 32, 40, 48, 56, 64, 72, 80, 88,
374 96, 112, 128, 144, 160, 176, 192, 208,
375 224, 256, 288, 320, 352, 384, 416, 448,
376 480, 544, 608, 672, 736, 800, 864, 928,
377 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
378 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
379 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
380 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
381 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
382 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
383 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
384 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
386 const int tmp = (xsq_q10 >> 2) + 8;
387 const int k = get_msb(tmp) - 3;
388 const int xq = (k << 3) + ((tmp >> k) & 0x7);
389 const int one_q10 = 1 << 10;
390 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
391 const int b_q10 = one_q10 - a_q10;
392 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
393 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
396 void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
397 unsigned int qstep, int *rate,
399 // This function models the rate and distortion for a Laplacian
400 // source with given variance when quantized with a uniform quantizer
401 // with given stepsize. The closed form expressions are in:
402 // Hang and Chen, "Source Model for transform video coder and its
403 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
404 // Sys. for Video Tech., April 1997.
410 static const uint32_t MAX_XSQ_Q10 = 245727;
411 const uint64_t xsq_q10_64 =
412 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
413 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
414 model_rd_norm(xsq_q10, &r_q10, &d_q10);
415 *rate = ((r_q10 << n_log2) + 2) >> 2;
416 *dist = (var * (int64_t)d_q10 + 512) >> 10;
420 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
421 const struct macroblockd_plane *pd,
422 ENTROPY_CONTEXT t_above[16],
423 ENTROPY_CONTEXT t_left[16]) {
424 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
425 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
426 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
427 const ENTROPY_CONTEXT *const above = pd->above_context;
428 const ENTROPY_CONTEXT *const left = pd->left_context;
433 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
434 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
437 for (i = 0; i < num_4x4_w; i += 2)
438 t_above[i] = !!*(const uint16_t *)&above[i];
439 for (i = 0; i < num_4x4_h; i += 2)
440 t_left[i] = !!*(const uint16_t *)&left[i];
443 for (i = 0; i < num_4x4_w; i += 4)
444 t_above[i] = !!*(const uint32_t *)&above[i];
445 for (i = 0; i < num_4x4_h; i += 4)
446 t_left[i] = !!*(const uint32_t *)&left[i];
449 for (i = 0; i < num_4x4_w; i += 8)
450 t_above[i] = !!*(const uint64_t *)&above[i];
451 for (i = 0; i < num_4x4_h; i += 8)
452 t_left[i] = !!*(const uint64_t *)&left[i];
455 assert(0 && "Invalid transform size.");
460 void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
461 uint8_t *ref_y_buffer, int ref_y_stride,
462 int ref_frame, BLOCK_SIZE block_size) {
466 int best_sad = INT_MAX;
467 int this_sad = INT_MAX;
469 int near_same_nearest;
470 uint8_t *src_y_ptr = x->plane[0].src.buf;
472 const int num_mv_refs = MAX_MV_REF_CANDIDATES +
473 (cpi->sf.adaptive_motion_search &&
474 block_size < x->max_partition_size);
477 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
478 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
479 pred_mv[2] = x->pred_mv[ref_frame];
480 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
483 x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
484 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
485 // Get the sad for each candidate reference mv.
486 for (i = 0; i < num_mv_refs; ++i) {
487 const MV *this_mv = &pred_mv[i];
490 if (i == 1 && near_same_nearest)
492 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
493 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
494 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
496 if (fp_row ==0 && fp_col == 0 && zero_seen)
498 zero_seen |= (fp_row ==0 && fp_col == 0);
500 ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
501 // Find sad for current vector.
502 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
503 ref_y_ptr, ref_y_stride);
504 // Note if it is the best so far.
505 if (this_sad < best_sad) {
511 // Note the index of the mv that worked best in the reference list.
512 x->mv_best_ref_index[ref_frame] = best_index;
513 x->max_mv_context[ref_frame] = max_mv;
514 x->pred_mv_sad[ref_frame] = best_sad;
517 void vp10_setup_pred_block(const MACROBLOCKD *xd,
518 struct buf_2d dst[MAX_MB_PLANE],
519 const YV12_BUFFER_CONFIG *src,
520 int mi_row, int mi_col,
521 const struct scale_factors *scale,
522 const struct scale_factors *scale_uv) {
525 dst[0].buf = src->y_buffer;
526 dst[0].stride = src->y_stride;
527 dst[1].buf = src->u_buffer;
528 dst[2].buf = src->v_buffer;
529 dst[1].stride = dst[2].stride = src->uv_stride;
531 for (i = 0; i < MAX_MB_PLANE; ++i) {
532 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
533 i ? scale_uv : scale,
534 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
538 int vp10_raster_block_offset(BLOCK_SIZE plane_bsize,
539 int raster_block, int stride) {
540 const int bw = b_width_log2_lookup[plane_bsize];
541 const int y = 4 * (raster_block >> bw);
542 const int x = 4 * (raster_block & ((1 << bw) - 1));
543 return y * stride + x;
546 int16_t* vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
547 int raster_block, int16_t *base) {
548 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
549 return base + vp10_raster_block_offset(plane_bsize, raster_block, stride);
552 YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const VP10_COMP *cpi,
554 const VP10_COMMON *const cm = &cpi->common;
555 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
556 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
558 (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
559 &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
562 int vp10_get_switchable_rate(const VP10_COMP *cpi,
563 const MACROBLOCKD *const xd) {
564 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
565 const int ctx = vp10_get_pred_context_switchable_interp(xd);
566 return SWITCHABLE_INTERP_RATE_FACTOR *
567 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
570 void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
572 RD_OPT *const rd = &cpi->rd;
573 SPEED_FEATURES *const sf = &cpi->sf;
575 // Set baseline threshold values.
576 for (i = 0; i < MAX_MODES; ++i)
577 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
579 if (sf->adaptive_rd_thresh) {
580 rd->thresh_mult[THR_NEARESTMV] = 300;
581 rd->thresh_mult[THR_NEARESTG] = 300;
582 rd->thresh_mult[THR_NEARESTA] = 300;
584 rd->thresh_mult[THR_NEARESTMV] = 0;
585 rd->thresh_mult[THR_NEARESTG] = 0;
586 rd->thresh_mult[THR_NEARESTA] = 0;
589 rd->thresh_mult[THR_DC] += 1000;
591 rd->thresh_mult[THR_NEWMV] += 1000;
592 rd->thresh_mult[THR_NEWA] += 1000;
593 rd->thresh_mult[THR_NEWG] += 1000;
595 rd->thresh_mult[THR_NEARMV] += 1000;
596 rd->thresh_mult[THR_NEARA] += 1000;
597 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
598 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
600 rd->thresh_mult[THR_TM] += 1000;
602 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
603 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
604 rd->thresh_mult[THR_NEARG] += 1000;
605 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
606 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
608 rd->thresh_mult[THR_ZEROMV] += 2000;
609 rd->thresh_mult[THR_ZEROG] += 2000;
610 rd->thresh_mult[THR_ZEROA] += 2000;
611 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
612 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
614 rd->thresh_mult[THR_H_PRED] += 2000;
615 rd->thresh_mult[THR_V_PRED] += 2000;
616 rd->thresh_mult[THR_D45_PRED ] += 2500;
617 rd->thresh_mult[THR_D135_PRED] += 2500;
618 rd->thresh_mult[THR_D117_PRED] += 2500;
619 rd->thresh_mult[THR_D153_PRED] += 2500;
620 rd->thresh_mult[THR_D207_PRED] += 2500;
621 rd->thresh_mult[THR_D63_PRED] += 2500;
624 void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
625 static const int thresh_mult[2][MAX_REFS] =
626 {{2500, 2500, 2500, 4500, 4500, 2500},
627 {2000, 2000, 2000, 4000, 4000, 2000}};
628 RD_OPT *const rd = &cpi->rd;
629 const int idx = cpi->oxcf.mode == BEST;
630 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
633 void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
634 int bsize, int best_mode_index) {
636 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
638 for (mode = 0; mode < top_mode; ++mode) {
639 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
640 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
642 for (bs = min_size; bs <= max_size; ++bs) {
643 int *const fact = &factor_buf[bs][mode];
644 if (mode == best_mode_index) {
645 *fact -= (*fact >> 4);
647 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
654 int vp10_get_intra_cost_penalty(int qindex, int qdelta,
655 vpx_bit_depth_t bit_depth) {
656 const int q = vp10_dc_quant(qindex, qdelta, bit_depth);
657 #if CONFIG_VP9_HIGHBITDEPTH
664 return ROUND_POWER_OF_TWO(5 * q, 2);
666 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
671 #endif // CONFIG_VP9_HIGHBITDEPTH