granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rd.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14
  15 #include "./vp9_rtcd.h"
  16
  17 #include "vpx_mem/vpx_mem.h"
  18 #include "vpx_ports/bitops.h"
  19 #include "vpx_ports/mem.h"
  20 #include "vpx_ports/system_state.h"
  21
  22 #include "vp9/common/vp9_common.h"
  23 #include "vp9/common/vp9_entropy.h"
  24 #include "vp9/common/vp9_entropymode.h"
  25 #include "vp9/common/vp9_mvref_common.h"
  26 #include "vp9/common/vp9_pred_common.h"
  27 #include "vp9/common/vp9_quant_common.h"
  28 #include "vp9/common/vp9_reconinter.h"
  29 #include "vp9/common/vp9_reconintra.h"
  30 #include "vp9/common/vp9_seg_common.h"
  31
  32 #include "vp9/encoder/vp9_cost.h"
  33 #include "vp9/encoder/vp9_encodemb.h"
  34 #include "vp9/encoder/vp9_encodemv.h"
  35 #include "vp9/encoder/vp9_encoder.h"
  36 #include "vp9/encoder/vp9_mcomp.h"
  37 #include "vp9/encoder/vp9_quantize.h"
  38 #include "vp9/encoder/vp9_ratectrl.h"
  39 #include "vp9/encoder/vp9_rd.h"
  40 #include "vp9/encoder/vp9_tokenize.h"
  41
  42 #define RD_THRESH_POW      1.25
  43 #define RD_MULT_EPB_RATIO  64
  44
  45 // Factor to weigh the rate for switchable interp filters.
  46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  47
  48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
  49   rd_cost->rate = INT_MAX;
  50   rd_cost->dist = INT64_MAX;
  51   rd_cost->rdcost = INT64_MAX;
  52 }
  53
  54 void vp9_rd_cost_init(RD_COST *rd_cost) {
  55   rd_cost->rate = 0;
  56   rd_cost->dist = 0;
  57   rd_cost->rdcost = 0;
  58 }
  59
  60 // The baseline rd thresholds for breaking out of the rd loop for
  61 // certain modes are assumed to be based on 8x8 blocks.
  62 // This table is used to correct for block size.
  63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
  64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
  65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
  66 };
  67
  68 static void fill_mode_costs(VP9_COMP *cpi) {
  69   const FRAME_CONTEXT *const fc = cpi->common.fc;
  70   int i, j;
  71
  72   for (i = 0; i < INTRA_MODES; ++i)
  73     for (j = 0; j < INTRA_MODES; ++j)
  74       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
  75                       vp9_intra_mode_tree);
  76
  77   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
  78   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
  79                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  80   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
  81                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  82
  83   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  84     vp9_cost_tokens(cpi->switchable_interp_costs[i],
  85                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
  86 }
  87
  88 static void fill_token_costs(vp9_coeff_cost *c,
  89                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
  90   int i, j, k, l;
  91   TX_SIZE t;
  92   for (t = TX_4X4; t <= TX_32X32; ++t)
  93     for (i = 0; i < PLANE_TYPES; ++i)
  94       for (j = 0; j < REF_TYPES; ++j)
  95         for (k = 0; k < COEF_BANDS; ++k)
  96           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
  97             vpx_prob probs[ENTROPY_NODES];
  98             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
  99             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
 100                             vp9_coef_tree);
 101             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
 102                                  vp9_coef_tree);
 103             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
 104                    c[t][i][j][k][1][l][EOB_TOKEN]);
 105           }
 106 }
 107
 108 // Values are now correlated to quantizer.
 109 static int sad_per_bit16lut_8[QINDEX_RANGE];
 110 static int sad_per_bit4lut_8[QINDEX_RANGE];
 111
 112 #if CONFIG_VP9_HIGHBITDEPTH
 113 static int sad_per_bit16lut_10[QINDEX_RANGE];
 114 static int sad_per_bit4lut_10[QINDEX_RANGE];
 115 static int sad_per_bit16lut_12[QINDEX_RANGE];
 116 static int sad_per_bit4lut_12[QINDEX_RANGE];
 117 #endif
 118
 119 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
 120                             vpx_bit_depth_t bit_depth) {
 121   int i;
 122   // Initialize the sad lut tables using a formulaic calculation for now.
 123   // This is to make it easier to resolve the impact of experimental changes
 124   // to the quantizer tables.
 125   for (i = 0; i < range; i++) {
 126     const double q = vp9_convert_qindex_to_q(i, bit_depth);
 127     bit16lut[i] = (int)(0.0418 * q + 2.4107);
 128     bit4lut[i] = (int)(0.063 * q + 2.742);
 129   }
 130 }
 131
 132 void vp9_init_me_luts(void) {
 133   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
 134                   VPX_BITS_8);
 135 #if CONFIG_VP9_HIGHBITDEPTH
 136   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
 137                   VPX_BITS_10);
 138   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
 139                   VPX_BITS_12);
 140 #endif
 141 }
 142
 143 static const int rd_boost_factor[16] = {
 144   64, 32, 32, 32, 24, 16, 12, 12,
 145   8, 8, 4, 4, 2, 2, 1, 0
 146 };
 147 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
 148   128, 144, 128, 128, 144
 149 };
 150
 151 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
 152   const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
 153 #if CONFIG_VP9_HIGHBITDEPTH
 154   int64_t rdmult = 0;
 155   switch (cpi->common.bit_depth) {
 156     case VPX_BITS_8:
 157       rdmult = 88 * q * q / 24;
 158       break;
 159     case VPX_BITS_10:
 160       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
 161       break;
 162     case VPX_BITS_12:
 163       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
 164       break;
 165     default:
 166       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 167       return -1;
 168   }
 169 #else
 170   int64_t rdmult = 88 * q * q / 24;
 171 #endif  // CONFIG_VP9_HIGHBITDEPTH
 172   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 173     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 174     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
 175     const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
 176
 177     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
 178     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
 179   }
 180   return (int)rdmult;
 181 }
 182
 183 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
 184   double q;
 185 #if CONFIG_VP9_HIGHBITDEPTH
 186   switch (bit_depth) {
 187     case VPX_BITS_8:
 188       q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 189       break;
 190     case VPX_BITS_10:
 191       q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
 192       break;
 193     case VPX_BITS_12:
 194       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
 195       break;
 196     default:
 197       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 198       return -1;
 199   }
 200 #else
 201   (void) bit_depth;
 202   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 203 #endif  // CONFIG_VP9_HIGHBITDEPTH
 204   // TODO(debargha): Adjust the function below.
 205   return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
 206 }
 207
 208 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
 209 #if CONFIG_VP9_HIGHBITDEPTH
 210   switch (cpi->common.bit_depth) {
 211     case VPX_BITS_8:
 212       x->sadperbit16 = sad_per_bit16lut_8[qindex];
 213       x->sadperbit4 = sad_per_bit4lut_8[qindex];
 214       break;
 215     case VPX_BITS_10:
 216       x->sadperbit16 = sad_per_bit16lut_10[qindex];
 217       x->sadperbit4 = sad_per_bit4lut_10[qindex];
 218       break;
 219     case VPX_BITS_12:
 220       x->sadperbit16 = sad_per_bit16lut_12[qindex];
 221       x->sadperbit4 = sad_per_bit4lut_12[qindex];
 222       break;
 223     default:
 224       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 225   }
 226 #else
 227   (void)cpi;
 228   x->sadperbit16 = sad_per_bit16lut_8[qindex];
 229   x->sadperbit4 = sad_per_bit4lut_8[qindex];
 230 #endif  // CONFIG_VP9_HIGHBITDEPTH
 231 }
 232
 233 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
 234   int i, bsize, segment_id;
 235
 236   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 237     const int qindex =
 238         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
 239               cm->y_dc_delta_q, 0, MAXQ);
 240     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
 241
 242     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 243       // Threshold here seems unnecessarily harsh but fine given actual
 244       // range of values used for cpi->sf.thresh_mult[].
 245       const int t = q * rd_thresh_block_size_factor[bsize];
 246       const int thresh_max = INT_MAX / t;
 247
 248       if (bsize >= BLOCK_8X8) {
 249         for (i = 0; i < MAX_MODES; ++i)
 250           rd->threshes[segment_id][bsize][i] =
 251               rd->thresh_mult[i] < thresh_max
 252                   ? rd->thresh_mult[i] * t / 4
 253                   : INT_MAX;
 254       } else {
 255         for (i = 0; i < MAX_REFS; ++i)
 256           rd->threshes[segment_id][bsize][i] =
 257               rd->thresh_mult_sub8x8[i] < thresh_max
 258                   ? rd->thresh_mult_sub8x8[i] * t / 4
 259                   : INT_MAX;
 260       }
 261     }
 262   }
 263 }
 264
 265 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 266   VP9_COMMON *const cm = &cpi->common;
 267   MACROBLOCK *const x = &cpi->td.mb;
 268   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
 269   RD_OPT *const rd = &cpi->rd;
 270   int i;
 271
 272   vpx_clear_system_state();
 273
 274   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
 275   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 276
 277   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
 278   x->errorperbit += (x->errorperbit == 0);
 279
 280   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 281                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 282
 283   set_block_thresholds(cm, rd);
 284   set_partition_probs(cm, xd);
 285
 286   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
 287     fill_token_costs(x->token_costs, cm->fc->coef_probs);
 288
 289   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
 290       cm->frame_type == KEY_FRAME) {
 291     for (i = 0; i < PARTITION_CONTEXTS; ++i)
 292       vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
 293                       vp9_partition_tree);
 294   }
 295
 296   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
 297       cm->frame_type == KEY_FRAME) {
 298     fill_mode_costs(cpi);
 299
 300     if (!frame_is_intra_only(cm)) {
 301       vp9_build_nmv_cost_table(x->nmvjointcost,
 302                                cm->allow_high_precision_mv ? x->nmvcost_hp
 303                                                            : x->nmvcost,
 304                                &cm->fc->nmvc, cm->allow_high_precision_mv);
 305
 306       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 307         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
 308                         cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
 309     }
 310   }
 311 }
 312
 313 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 314   // NOTE: The tables below must be of the same size.
 315
 316   // The functions described below are sampled at the four most significant
 317   // bits of x^2 + 8 / 256.
 318
 319   // Normalized rate:
 320   // This table models the rate for a Laplacian source with given variance
 321   // when quantized with a uniform quantizer with given stepsize. The
 322   // closed form expression is:
 323   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 324   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 325   // and H(x) is the binary entropy function.
 326   static const int rate_tab_q10[] = {
 327     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 328      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 329      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 330      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 331      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 332      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 333      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 334      1159,  1086,  1021,   963,   911,   864,   821,   781,
 335       745,   680,   623,   574,   530,   490,   455,   424,
 336       395,   345,   304,   269,   239,   213,   190,   171,
 337       154,   126,   104,    87,    73,    61,    52,    44,
 338        38,    28,    21,    16,    12,    10,     8,     6,
 339         5,     3,     2,     1,     1,     1,     0,     0,
 340   };
 341   // Normalized distortion:
 342   // This table models the normalized distortion for a Laplacian source
 343   // with given variance when quantized with a uniform quantizer
 344   // with given stepsize. The closed form expression is:
 345   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 346   // where x = qpstep / sqrt(variance).
 347   // Note the actual distortion is Dn * variance.
 348   static const int dist_tab_q10[] = {
 349        0,     0,     1,     1,     1,     2,     2,     2,
 350        3,     3,     4,     5,     5,     6,     7,     7,
 351        8,     9,    11,    12,    13,    15,    16,    17,
 352       18,    21,    24,    26,    29,    31,    34,    36,
 353       39,    44,    49,    54,    59,    64,    69,    73,
 354       78,    88,    97,   106,   115,   124,   133,   142,
 355      151,   167,   184,   200,   215,   231,   245,   260,
 356      274,   301,   327,   351,   375,   397,   418,   439,
 357      458,   495,   528,   559,   587,   613,   637,   659,
 358      680,   717,   749,   777,   801,   823,   842,   859,
 359      874,   899,   919,   936,   949,   960,   969,   977,
 360      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 361     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 362   };
 363   static const int xsq_iq_q10[] = {
 364          0,      4,      8,     12,     16,     20,     24,     28,
 365         32,     40,     48,     56,     64,     72,     80,     88,
 366         96,    112,    128,    144,    160,    176,    192,    208,
 367        224,    256,    288,    320,    352,    384,    416,    448,
 368        480,    544,    608,    672,    736,    800,    864,    928,
 369        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 370       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 371       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 372       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 373      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 374      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 375      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 376     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 377   };
 378   const int tmp = (xsq_q10 >> 2) + 8;
 379   const int k = get_msb(tmp) - 3;
 380   const int xq = (k << 3) + ((tmp >> k) & 0x7);
 381   const int one_q10 = 1 << 10;
 382   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 383   const int b_q10 = one_q10 - a_q10;
 384   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 385   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 386 }
 387
 388 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
 389                                   unsigned int qstep, int *rate,
 390                                   int64_t *dist) {
 391   // This function models the rate and distortion for a Laplacian
 392   // source with given variance when quantized with a uniform quantizer
 393   // with given stepsize. The closed form expressions are in:
 394   // Hang and Chen, "Source Model for transform video coder and its
 395   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 396   // Sys. for Video Tech., April 1997.
 397   if (var == 0) {
 398     *rate = 0;
 399     *dist = 0;
 400   } else {
 401     int d_q10, r_q10;
 402     static const uint32_t MAX_XSQ_Q10 = 245727;
 403     const uint64_t xsq_q10_64 =
 404         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
 405     const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
 406     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 407     *rate = ((r_q10 << n_log2) + 2) >> 2;
 408     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 409   }
 410 }
 411
 412 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 413                               const struct macroblockd_plane *pd,
 414                               ENTROPY_CONTEXT t_above[16],
 415                               ENTROPY_CONTEXT t_left[16]) {
 416   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 417   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 418   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 419   const ENTROPY_CONTEXT *const above = pd->above_context;
 420   const ENTROPY_CONTEXT *const left = pd->left_context;
 421
 422   int i;
 423   switch (tx_size) {
 424     case TX_4X4:
 425       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 426       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 427       break;
 428     case TX_8X8:
 429       for (i = 0; i < num_4x4_w; i += 2)
 430         t_above[i] = !!*(const uint16_t *)&above[i];
 431       for (i = 0; i < num_4x4_h; i += 2)
 432         t_left[i] = !!*(const uint16_t *)&left[i];
 433       break;
 434     case TX_16X16:
 435       for (i = 0; i < num_4x4_w; i += 4)
 436         t_above[i] = !!*(const uint32_t *)&above[i];
 437       for (i = 0; i < num_4x4_h; i += 4)
 438         t_left[i] = !!*(const uint32_t *)&left[i];
 439       break;
 440     case TX_32X32:
 441       for (i = 0; i < num_4x4_w; i += 8)
 442         t_above[i] = !!*(const uint64_t *)&above[i];
 443       for (i = 0; i < num_4x4_h; i += 8)
 444         t_left[i] = !!*(const uint64_t *)&left[i];
 445       break;
 446     default:
 447       assert(0 && "Invalid transform size.");
 448       break;
 449   }
 450 }
 451
 452 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
 453                  uint8_t *ref_y_buffer, int ref_y_stride,
 454                  int ref_frame, BLOCK_SIZE block_size) {
 455   int i;
 456   int zero_seen = 0;
 457   int best_index = 0;
 458   int best_sad = INT_MAX;
 459   int this_sad = INT_MAX;
 460   int max_mv = 0;
 461   int near_same_nearest;
 462   uint8_t *src_y_ptr = x->plane[0].src.buf;
 463   uint8_t *ref_y_ptr;
 464   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
 465                     (cpi->sf.adaptive_motion_search &&
 466                      block_size < x->max_partition_size);
 467
 468   MV pred_mv[3];
 469   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
 470   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
 471   pred_mv[2] = x->pred_mv[ref_frame];
 472   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
 473
 474   near_same_nearest =
 475       x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
 476           x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
 477   // Get the sad for each candidate reference mv.
 478   for (i = 0; i < num_mv_refs; ++i) {
 479     const MV *this_mv = &pred_mv[i];
 480     int fp_row, fp_col;
 481
 482     if (i == 1 && near_same_nearest)
 483       continue;
 484     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
 485     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
 486     max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
 487
 488     if (fp_row ==0 && fp_col == 0 && zero_seen)
 489       continue;
 490     zero_seen |= (fp_row ==0 && fp_col == 0);
 491
 492     ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
 493     // Find sad for current vector.
 494     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
 495                                            ref_y_ptr, ref_y_stride);
 496     // Note if it is the best so far.
 497     if (this_sad < best_sad) {
 498       best_sad = this_sad;
 499       best_index = i;
 500     }
 501   }
 502
 503   // Note the index of the mv that worked best in the reference list.
 504   x->mv_best_ref_index[ref_frame] = best_index;
 505   x->max_mv_context[ref_frame] = max_mv;
 506   x->pred_mv_sad[ref_frame] = best_sad;
 507 }
 508
 509 void vp9_setup_pred_block(const MACROBLOCKD *xd,
 510                           struct buf_2d dst[MAX_MB_PLANE],
 511                           const YV12_BUFFER_CONFIG *src,
 512                           int mi_row, int mi_col,
 513                           const struct scale_factors *scale,
 514                           const struct scale_factors *scale_uv) {
 515   int i;
 516
 517   dst[0].buf = src->y_buffer;
 518   dst[0].stride = src->y_stride;
 519   dst[1].buf = src->u_buffer;
 520   dst[2].buf = src->v_buffer;
 521   dst[1].stride = dst[2].stride = src->uv_stride;
 522
 523   for (i = 0; i < MAX_MB_PLANE; ++i) {
 524     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
 525                      i ? scale_uv : scale,
 526                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
 527   }
 528 }
 529
 530 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
 531                             int raster_block, int stride) {
 532   const int bw = b_width_log2_lookup[plane_bsize];
 533   const int y = 4 * (raster_block >> bw);
 534   const int x = 4 * (raster_block & ((1 << bw) - 1));
 535   return y * stride + x;
 536 }
 537
 538 int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 539                                        int raster_block, int16_t *base) {
 540   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 541   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
 542 }
 543
 544 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
 545                                              int ref_frame) {
 546   const VP9_COMMON *const cm = &cpi->common;
 547   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
 548   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
 549   return
 550       (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
 551           &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
 552 }
 553
 554 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
 555   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 556   const int ctx = vp9_get_pred_context_switchable_interp(xd);
 557   return SWITCHABLE_INTERP_RATE_FACTOR *
 558              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 559 }
 560
 561 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
 562   int i;
 563   RD_OPT *const rd = &cpi->rd;
 564   SPEED_FEATURES *const sf = &cpi->sf;
 565
 566   // Set baseline threshold values.
 567   for (i = 0; i < MAX_MODES; ++i)
 568     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 569
 570   if (sf->adaptive_rd_thresh) {
 571     rd->thresh_mult[THR_NEARESTMV] = 300;
 572     rd->thresh_mult[THR_NEARESTG] = 300;
 573     rd->thresh_mult[THR_NEARESTA] = 300;
 574   } else {
 575     rd->thresh_mult[THR_NEARESTMV] = 0;
 576     rd->thresh_mult[THR_NEARESTG] = 0;
 577     rd->thresh_mult[THR_NEARESTA] = 0;
 578   }
 579
 580   rd->thresh_mult[THR_DC] += 1000;
 581
 582   rd->thresh_mult[THR_NEWMV] += 1000;
 583   rd->thresh_mult[THR_NEWA] += 1000;
 584   rd->thresh_mult[THR_NEWG] += 1000;
 585
 586   rd->thresh_mult[THR_NEARMV] += 1000;
 587   rd->thresh_mult[THR_NEARA] += 1000;
 588   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
 589   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
 590
 591   rd->thresh_mult[THR_TM] += 1000;
 592
 593   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
 594   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
 595   rd->thresh_mult[THR_NEARG] += 1000;
 596   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
 597   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
 598
 599   rd->thresh_mult[THR_ZEROMV] += 2000;
 600   rd->thresh_mult[THR_ZEROG] += 2000;
 601   rd->thresh_mult[THR_ZEROA] += 2000;
 602   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
 603   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
 604
 605   rd->thresh_mult[THR_H_PRED] += 2000;
 606   rd->thresh_mult[THR_V_PRED] += 2000;
 607   rd->thresh_mult[THR_D45_PRED ] += 2500;
 608   rd->thresh_mult[THR_D135_PRED] += 2500;
 609   rd->thresh_mult[THR_D117_PRED] += 2500;
 610   rd->thresh_mult[THR_D153_PRED] += 2500;
 611   rd->thresh_mult[THR_D207_PRED] += 2500;
 612   rd->thresh_mult[THR_D63_PRED] += 2500;
 613 }
 614
 615 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
 616   static const int thresh_mult[2][MAX_REFS] =
 617       {{2500, 2500, 2500, 4500, 4500, 2500},
 618        {2000, 2000, 2000, 4000, 4000, 2000}};
 619   RD_OPT *const rd = &cpi->rd;
 620   const int idx = cpi->oxcf.mode == BEST;
 621   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
 622 }
 623
 624 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
 625                                int bsize, int best_mode_index) {
 626   if (rd_thresh > 0) {
 627     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
 628     int mode;
 629     for (mode = 0; mode < top_mode; ++mode) {
 630       const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
 631       const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
 632       BLOCK_SIZE bs;
 633       for (bs = min_size; bs <= max_size; ++bs) {
 634         int *const fact = &factor_buf[bs][mode];
 635         if (mode == best_mode_index) {
 636           *fact -= (*fact >> 4);
 637         } else {
 638           *fact = MIN(*fact + RD_THRESH_INC,
 639                       rd_thresh * RD_THRESH_MAX_FACT);
 640         }
 641       }
 642     }
 643   }
 644 }
 645
 646 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
 647                                vpx_bit_depth_t bit_depth) {
 648   const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
 649 #if CONFIG_VP9_HIGHBITDEPTH
 650   switch (bit_depth) {
 651     case VPX_BITS_8:
 652       return 20 * q;
 653     case VPX_BITS_10:
 654       return 5 * q;
 655     case VPX_BITS_12:
 656       return ROUND_POWER_OF_TWO(5 * q, 2);
 657     default:
 658       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 659       return -1;
 660   }
 661 #else
 662   return 20 * q;
 663 #endif  // CONFIG_VP9_HIGHBITDEPTH
 664 }
 665