granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rd.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14
  15 #include "./vp9_rtcd.h"
  16
  17 #include "vpx_mem/vpx_mem.h"
  18
  19 #include "vp9/common/vp9_common.h"
  20 #include "vp9/common/vp9_entropy.h"
  21 #include "vp9/common/vp9_entropymode.h"
  22 #include "vp9/common/vp9_mvref_common.h"
  23 #include "vp9/common/vp9_pred_common.h"
  24 #include "vp9/common/vp9_quant_common.h"
  25 #include "vp9/common/vp9_reconinter.h"
  26 #include "vp9/common/vp9_reconintra.h"
  27 #include "vp9/common/vp9_seg_common.h"
  28 #include "vp9/common/vp9_systemdependent.h"
  29
  30 #include "vp9/encoder/vp9_cost.h"
  31 #include "vp9/encoder/vp9_encodemb.h"
  32 #include "vp9/encoder/vp9_encodemv.h"
  33 #include "vp9/encoder/vp9_encoder.h"
  34 #include "vp9/encoder/vp9_mcomp.h"
  35 #include "vp9/encoder/vp9_quantize.h"
  36 #include "vp9/encoder/vp9_ratectrl.h"
  37 #include "vp9/encoder/vp9_rd.h"
  38 #include "vp9/encoder/vp9_tokenize.h"
  39 #include "vp9/encoder/vp9_variance.h"
  40
  41 #define RD_THRESH_POW      1.25
  42 #define RD_MULT_EPB_RATIO  64
  43
  44 // Factor to weigh the rate for switchable interp filters.
  45 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  46
  47 // The baseline rd thresholds for breaking out of the rd loop for
  48 // certain modes are assumed to be based on 8x8 blocks.
  49 // This table is used to correct for block size.
  50 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
  51 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
  52   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
  53 };
  54
  55 static void fill_mode_costs(VP9_COMP *cpi) {
  56   const FRAME_CONTEXT *const fc = &cpi->common.fc;
  57   int i, j;
  58
  59   for (i = 0; i < INTRA_MODES; ++i)
  60     for (j = 0; j < INTRA_MODES; ++j)
  61       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
  62                       vp9_intra_mode_tree);
  63
  64   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
  65   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
  66                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  67   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
  68                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  69
  70   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  71     vp9_cost_tokens(cpi->switchable_interp_costs[i],
  72                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
  73 }
  74
  75 static void fill_token_costs(vp9_coeff_cost *c,
  76                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
  77   int i, j, k, l;
  78   TX_SIZE t;
  79   for (t = TX_4X4; t <= TX_32X32; ++t)
  80     for (i = 0; i < PLANE_TYPES; ++i)
  81       for (j = 0; j < REF_TYPES; ++j)
  82         for (k = 0; k < COEF_BANDS; ++k)
  83           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
  84             vp9_prob probs[ENTROPY_NODES];
  85             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
  86             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
  87                             vp9_coef_tree);
  88             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
  89                                  vp9_coef_tree);
  90             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
  91                    c[t][i][j][k][1][l][EOB_TOKEN]);
  92           }
  93 }
  94
  95 // Values are now correlated to quantizer.
  96 static int sad_per_bit16lut_8[QINDEX_RANGE];
  97 static int sad_per_bit4lut_8[QINDEX_RANGE];
  98
  99 #if CONFIG_VP9_HIGHBITDEPTH
 100 static int sad_per_bit16lut_10[QINDEX_RANGE];
 101 static int sad_per_bit4lut_10[QINDEX_RANGE];
 102 static int sad_per_bit16lut_12[QINDEX_RANGE];
 103 static int sad_per_bit4lut_12[QINDEX_RANGE];
 104 #endif
 105
 106 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
 107                             vpx_bit_depth_t bit_depth) {
 108   int i;
 109   // Initialize the sad lut tables using a formulaic calculation for now.
 110   // This is to make it easier to resolve the impact of experimental changes
 111   // to the quantizer tables.
 112   for (i = 0; i < range; i++) {
 113     const double q = vp9_convert_qindex_to_q(i, bit_depth);
 114     bit16lut[i] = (int)(0.0418 * q + 2.4107);
 115     bit4lut[i] = (int)(0.063 * q + 2.742);
 116   }
 117 }
 118
 119 void vp9_init_me_luts() {
 120   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
 121                   VPX_BITS_8);
 122 #if CONFIG_VP9_HIGHBITDEPTH
 123   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
 124                   VPX_BITS_10);
 125   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
 126                   VPX_BITS_12);
 127 #endif
 128 }
 129
 130 static const int rd_boost_factor[16] = {
 131   64, 32, 32, 32, 24, 16, 12, 12,
 132   8, 8, 4, 4, 2, 2, 1, 0
 133 };
 134 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
 135   128, 144, 128, 128, 144
 136 };
 137
 138 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
 139   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
 140 #if CONFIG_VP9_HIGHBITDEPTH
 141   int rdmult = 0;
 142   switch (cpi->common.bit_depth) {
 143     case VPX_BITS_8:
 144       rdmult = 88 * q * q / 24;
 145       break;
 146     case VPX_BITS_10:
 147       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
 148       break;
 149     case VPX_BITS_12:
 150       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
 151       break;
 152     default:
 153       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 154       return -1;
 155   }
 156 #else
 157   int rdmult = 88 * q * q / 24;
 158 #endif  // CONFIG_VP9_HIGHBITDEPTH
 159   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 160     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 161     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
 162     const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
 163
 164     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
 165     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
 166   }
 167   return rdmult;
 168 }
 169
 170 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
 171   double q;
 172 #if CONFIG_VP9_HIGHBITDEPTH
 173   switch (bit_depth) {
 174     case VPX_BITS_8:
 175       q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 176       break;
 177     case VPX_BITS_10:
 178       q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
 179       break;
 180     case VPX_BITS_12:
 181       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
 182       break;
 183     default:
 184       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 185       return -1;
 186   }
 187 #else
 188   (void) bit_depth;
 189   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 190 #endif  // CONFIG_VP9_HIGHBITDEPTH
 191   // TODO(debargha): Adjust the function below.
 192   return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
 193 }
 194
 195 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 196 #if CONFIG_VP9_HIGHBITDEPTH
 197   switch (cpi->common.bit_depth) {
 198     case VPX_BITS_8:
 199       cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
 200       cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
 201       break;
 202     case VPX_BITS_10:
 203       cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex];
 204       cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex];
 205       break;
 206     case VPX_BITS_12:
 207       cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex];
 208       cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex];
 209       break;
 210     default:
 211       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 212   }
 213 #else
 214   cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
 215   cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
 216 #endif  // CONFIG_VP9_HIGHBITDEPTH
 217 }
 218
 219 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
 220   int i, bsize, segment_id;
 221
 222   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 223     const int qindex =
 224         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
 225               cm->y_dc_delta_q, 0, MAXQ);
 226     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
 227
 228     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 229       // Threshold here seems unnecessarily harsh but fine given actual
 230       // range of values used for cpi->sf.thresh_mult[].
 231       const int t = q * rd_thresh_block_size_factor[bsize];
 232       const int thresh_max = INT_MAX / t;
 233
 234       if (bsize >= BLOCK_8X8) {
 235         for (i = 0; i < MAX_MODES; ++i)
 236           rd->threshes[segment_id][bsize][i] =
 237               rd->thresh_mult[i] < thresh_max
 238                   ? rd->thresh_mult[i] * t / 4
 239                   : INT_MAX;
 240       } else {
 241         for (i = 0; i < MAX_REFS; ++i)
 242           rd->threshes[segment_id][bsize][i] =
 243               rd->thresh_mult_sub8x8[i] < thresh_max
 244                   ? rd->thresh_mult_sub8x8[i] * t / 4
 245                   : INT_MAX;
 246       }
 247     }
 248   }
 249 }
 250
 251 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 252   VP9_COMMON *const cm = &cpi->common;
 253   MACROBLOCK *const x = &cpi->mb;
 254   RD_OPT *const rd = &cpi->rd;
 255   int i;
 256
 257   vp9_clear_system_state();
 258
 259   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
 260   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 261
 262   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
 263   x->errorperbit += (x->errorperbit == 0);
 264
 265   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 266                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 267
 268   set_block_thresholds(cm, rd);
 269
 270   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
 271     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 272
 273     for (i = 0; i < PARTITION_CONTEXTS; ++i)
 274       vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
 275                       vp9_partition_tree);
 276   }
 277
 278   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
 279       cm->frame_type == KEY_FRAME) {
 280     fill_mode_costs(cpi);
 281
 282     if (!frame_is_intra_only(cm)) {
 283       vp9_build_nmv_cost_table(x->nmvjointcost,
 284                                cm->allow_high_precision_mv ? x->nmvcost_hp
 285                                                            : x->nmvcost,
 286                                &cm->fc.nmvc, cm->allow_high_precision_mv);
 287
 288       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 289         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
 290                         cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
 291     }
 292   }
 293 }
 294
 295 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 296   // NOTE: The tables below must be of the same size.
 297
 298   // The functions described below are sampled at the four most significant
 299   // bits of x^2 + 8 / 256.
 300
 301   // Normalized rate:
 302   // This table models the rate for a Laplacian source with given variance
 303   // when quantized with a uniform quantizer with given stepsize. The
 304   // closed form expression is:
 305   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 306   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 307   // and H(x) is the binary entropy function.
 308   static const int rate_tab_q10[] = {
 309     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 310      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 311      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 312      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 313      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 314      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 315      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 316      1159,  1086,  1021,   963,   911,   864,   821,   781,
 317       745,   680,   623,   574,   530,   490,   455,   424,
 318       395,   345,   304,   269,   239,   213,   190,   171,
 319       154,   126,   104,    87,    73,    61,    52,    44,
 320        38,    28,    21,    16,    12,    10,     8,     6,
 321         5,     3,     2,     1,     1,     1,     0,     0,
 322   };
 323   // Normalized distortion:
 324   // This table models the normalized distortion for a Laplacian source
 325   // with given variance when quantized with a uniform quantizer
 326   // with given stepsize. The closed form expression is:
 327   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 328   // where x = qpstep / sqrt(variance).
 329   // Note the actual distortion is Dn * variance.
 330   static const int dist_tab_q10[] = {
 331        0,     0,     1,     1,     1,     2,     2,     2,
 332        3,     3,     4,     5,     5,     6,     7,     7,
 333        8,     9,    11,    12,    13,    15,    16,    17,
 334       18,    21,    24,    26,    29,    31,    34,    36,
 335       39,    44,    49,    54,    59,    64,    69,    73,
 336       78,    88,    97,   106,   115,   124,   133,   142,
 337      151,   167,   184,   200,   215,   231,   245,   260,
 338      274,   301,   327,   351,   375,   397,   418,   439,
 339      458,   495,   528,   559,   587,   613,   637,   659,
 340      680,   717,   749,   777,   801,   823,   842,   859,
 341      874,   899,   919,   936,   949,   960,   969,   977,
 342      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 343     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 344   };
 345   static const int xsq_iq_q10[] = {
 346          0,      4,      8,     12,     16,     20,     24,     28,
 347         32,     40,     48,     56,     64,     72,     80,     88,
 348         96,    112,    128,    144,    160,    176,    192,    208,
 349        224,    256,    288,    320,    352,    384,    416,    448,
 350        480,    544,    608,    672,    736,    800,    864,    928,
 351        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 352       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 353       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 354       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 355      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 356      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 357      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 358     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 359   };
 360   const int tmp = (xsq_q10 >> 2) + 8;
 361   const int k = get_msb(tmp) - 3;
 362   const int xq = (k << 3) + ((tmp >> k) & 0x7);
 363   const int one_q10 = 1 << 10;
 364   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 365   const int b_q10 = one_q10 - a_q10;
 366   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 367   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 368 }
 369
 370 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
 371                                   unsigned int qstep, int *rate,
 372                                   int64_t *dist) {
 373   // This function models the rate and distortion for a Laplacian
 374   // source with given variance when quantized with a uniform quantizer
 375   // with given stepsize. The closed form expressions are in:
 376   // Hang and Chen, "Source Model for transform video coder and its
 377   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 378   // Sys. for Video Tech., April 1997.
 379   if (var == 0) {
 380     *rate = 0;
 381     *dist = 0;
 382   } else {
 383     int d_q10, r_q10;
 384     static const uint32_t MAX_XSQ_Q10 = 245727;
 385     const uint64_t xsq_q10_64 =
 386         ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
 387     const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
 388     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 389     *rate = (n * r_q10 + 2) >> 2;
 390     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 391   }
 392 }
 393
 394 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 395                               const struct macroblockd_plane *pd,
 396                               ENTROPY_CONTEXT t_above[16],
 397                               ENTROPY_CONTEXT t_left[16]) {
 398   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 399   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 400   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 401   const ENTROPY_CONTEXT *const above = pd->above_context;
 402   const ENTROPY_CONTEXT *const left = pd->left_context;
 403
 404   int i;
 405   switch (tx_size) {
 406     case TX_4X4:
 407       vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 408       vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 409       break;
 410     case TX_8X8:
 411       for (i = 0; i < num_4x4_w; i += 2)
 412         t_above[i] = !!*(const uint16_t *)&above[i];
 413       for (i = 0; i < num_4x4_h; i += 2)
 414         t_left[i] = !!*(const uint16_t *)&left[i];
 415       break;
 416     case TX_16X16:
 417       for (i = 0; i < num_4x4_w; i += 4)
 418         t_above[i] = !!*(const uint32_t *)&above[i];
 419       for (i = 0; i < num_4x4_h; i += 4)
 420         t_left[i] = !!*(const uint32_t *)&left[i];
 421       break;
 422     case TX_32X32:
 423       for (i = 0; i < num_4x4_w; i += 8)
 424         t_above[i] = !!*(const uint64_t *)&above[i];
 425       for (i = 0; i < num_4x4_h; i += 8)
 426         t_left[i] = !!*(const uint64_t *)&left[i];
 427       break;
 428     default:
 429       assert(0 && "Invalid transform size.");
 430       break;
 431   }
 432 }
 433
 434 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
 435                  uint8_t *ref_y_buffer, int ref_y_stride,
 436                  int ref_frame, BLOCK_SIZE block_size) {
 437   MACROBLOCKD *xd = &x->e_mbd;
 438   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
 439   int i;
 440   int zero_seen = 0;
 441   int best_index = 0;
 442   int best_sad = INT_MAX;
 443   int this_sad = INT_MAX;
 444   int max_mv = 0;
 445   uint8_t *src_y_ptr = x->plane[0].src.buf;
 446   uint8_t *ref_y_ptr;
 447   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
 448                     (cpi->sf.adaptive_motion_search &&
 449                      block_size < cpi->sf.max_partition_size);
 450
 451   MV pred_mv[3];
 452   pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
 453   pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
 454   pred_mv[2] = x->pred_mv[ref_frame];
 455
 456   // Get the sad for each candidate reference mv.
 457   for (i = 0; i < num_mv_refs; ++i) {
 458     const MV *this_mv = &pred_mv[i];
 459
 460     max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
 461     if (is_zero_mv(this_mv) && zero_seen)
 462       continue;
 463
 464     zero_seen |= is_zero_mv(this_mv);
 465
 466     ref_y_ptr =
 467         &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)];
 468
 469     // Find sad for current vector.
 470     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
 471                                            ref_y_ptr, ref_y_stride);
 472
 473     // Note if it is the best so far.
 474     if (this_sad < best_sad) {
 475       best_sad = this_sad;
 476       best_index = i;
 477     }
 478   }
 479
 480   // Note the index of the mv that worked best in the reference list.
 481   x->mv_best_ref_index[ref_frame] = best_index;
 482   x->max_mv_context[ref_frame] = max_mv;
 483   x->pred_mv_sad[ref_frame] = best_sad;
 484 }
 485
 486 void vp9_setup_pred_block(const MACROBLOCKD *xd,
 487                           struct buf_2d dst[MAX_MB_PLANE],
 488                           const YV12_BUFFER_CONFIG *src,
 489                           int mi_row, int mi_col,
 490                           const struct scale_factors *scale,
 491                           const struct scale_factors *scale_uv) {
 492   int i;
 493
 494   dst[0].buf = src->y_buffer;
 495   dst[0].stride = src->y_stride;
 496   dst[1].buf = src->u_buffer;
 497   dst[2].buf = src->v_buffer;
 498   dst[1].stride = dst[2].stride = src->uv_stride;
 499
 500   for (i = 0; i < MAX_MB_PLANE; ++i) {
 501     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
 502                      i ? scale_uv : scale,
 503                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
 504   }
 505 }
 506
 507 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
 508                                                    int ref_frame) {
 509   const VP9_COMMON *const cm = &cpi->common;
 510   const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
 511   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
 512   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
 513 }
 514
 515 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
 516   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 517   const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 518   const int ctx = vp9_get_pred_context_switchable_interp(xd);
 519   return SWITCHABLE_INTERP_RATE_FACTOR *
 520              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 521 }
 522
 523 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
 524   int i;
 525   RD_OPT *const rd = &cpi->rd;
 526   SPEED_FEATURES *const sf = &cpi->sf;
 527
 528   // Set baseline threshold values.
 529   for (i = 0; i < MAX_MODES; ++i)
 530     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 531
 532   if (sf->adaptive_rd_thresh) {
 533     rd->thresh_mult[THR_NEARESTMV] = 300;
 534     rd->thresh_mult[THR_NEARESTG] = 300;
 535     rd->thresh_mult[THR_NEARESTA] = 300;
 536   } else {
 537     rd->thresh_mult[THR_NEARESTMV] = 0;
 538     rd->thresh_mult[THR_NEARESTG] = 0;
 539     rd->thresh_mult[THR_NEARESTA] = 0;
 540   }
 541
 542   rd->thresh_mult[THR_DC] += 1000;
 543
 544   rd->thresh_mult[THR_NEWMV] += 1000;
 545   rd->thresh_mult[THR_NEWA] += 1000;
 546   rd->thresh_mult[THR_NEWG] += 1000;
 547
 548   // Adjust threshold only in real time mode, which only uses last
 549   // reference frame.
 550   rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
 551
 552   rd->thresh_mult[THR_NEARMV] += 1000;
 553   rd->thresh_mult[THR_NEARA] += 1000;
 554   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
 555   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
 556
 557   rd->thresh_mult[THR_TM] += 1000;
 558
 559   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
 560   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
 561   rd->thresh_mult[THR_NEARG] += 1000;
 562   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
 563   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
 564
 565   rd->thresh_mult[THR_ZEROMV] += 2000;
 566   rd->thresh_mult[THR_ZEROG] += 2000;
 567   rd->thresh_mult[THR_ZEROA] += 2000;
 568   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
 569   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
 570
 571   rd->thresh_mult[THR_H_PRED] += 2000;
 572   rd->thresh_mult[THR_V_PRED] += 2000;
 573   rd->thresh_mult[THR_D45_PRED ] += 2500;
 574   rd->thresh_mult[THR_D135_PRED] += 2500;
 575   rd->thresh_mult[THR_D117_PRED] += 2500;
 576   rd->thresh_mult[THR_D153_PRED] += 2500;
 577   rd->thresh_mult[THR_D207_PRED] += 2500;
 578   rd->thresh_mult[THR_D63_PRED] += 2500;
 579 }
 580
 581 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
 582   const SPEED_FEATURES *const sf = &cpi->sf;
 583   RD_OPT *const rd = &cpi->rd;
 584   int i;
 585
 586   for (i = 0; i < MAX_REFS; ++i)
 587     rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 588
 589   rd->thresh_mult_sub8x8[THR_LAST] += 2500;
 590   rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
 591   rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
 592   rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
 593   rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
 594   rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
 595
 596   // Check for masked out split cases.
 597   for (i = 0; i < MAX_REFS; ++i)
 598     if (sf->disable_split_mask & (1 << i))
 599       rd->thresh_mult_sub8x8[i] = INT_MAX;
 600 }
 601
 602 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
 603                                vpx_bit_depth_t bit_depth) {
 604   const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
 605 #if CONFIG_VP9_HIGHBITDEPTH
 606   switch (bit_depth) {
 607     case VPX_BITS_8:
 608       return 20 * q;
 609     case VPX_BITS_10:
 610       return 5 * q;
 611     case VPX_BITS_12:
 612       return ROUND_POWER_OF_TWO(5 * q, 2);
 613     default:
 614       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 615       return -1;
 616   }
 617 #else
 618   return 20 * q;
 619 #endif  // CONFIG_VP9_HIGHBITDEPTH
 620 }
 621