granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rd.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14
  15 #include "./vp9_rtcd.h"
  16
  17 #include "vpx_mem/vpx_mem.h"
  18 #include "vpx_ports/mem.h"
  19
  20 #include "vp9/common/vp9_common.h"
  21 #include "vp9/common/vp9_entropy.h"
  22 #include "vp9/common/vp9_entropymode.h"
  23 #include "vp9/common/vp9_mvref_common.h"
  24 #include "vp9/common/vp9_pred_common.h"
  25 #include "vp9/common/vp9_quant_common.h"
  26 #include "vp9/common/vp9_reconinter.h"
  27 #include "vp9/common/vp9_reconintra.h"
  28 #include "vp9/common/vp9_seg_common.h"
  29 #include "vp9/common/vp9_systemdependent.h"
  30
  31 #include "vp9/encoder/vp9_cost.h"
  32 #include "vp9/encoder/vp9_encodemb.h"
  33 #include "vp9/encoder/vp9_encodemv.h"
  34 #include "vp9/encoder/vp9_encoder.h"
  35 #include "vp9/encoder/vp9_mcomp.h"
  36 #include "vp9/encoder/vp9_quantize.h"
  37 #include "vp9/encoder/vp9_ratectrl.h"
  38 #include "vp9/encoder/vp9_rd.h"
  39 #include "vp9/encoder/vp9_tokenize.h"
  40 #include "vp9/encoder/vp9_variance.h"
  41
  42 #define RD_THRESH_POW      1.25
  43 #define RD_MULT_EPB_RATIO  64
  44
  45 // Factor to weigh the rate for switchable interp filters.
  46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  47
  48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
  49   rd_cost->rate = INT_MAX;
  50   rd_cost->dist = INT64_MAX;
  51   rd_cost->rdcost = INT64_MAX;
  52 }
  53
  54 void vp9_rd_cost_init(RD_COST *rd_cost) {
  55   rd_cost->rate = 0;
  56   rd_cost->dist = 0;
  57   rd_cost->rdcost = 0;
  58 }
  59
  60 // The baseline rd thresholds for breaking out of the rd loop for
  61 // certain modes are assumed to be based on 8x8 blocks.
  62 // This table is used to correct for block size.
  63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
  64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
  65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
  66 };
  67
  68 static void fill_mode_costs(VP9_COMP *cpi) {
  69   const FRAME_CONTEXT *const fc = cpi->common.fc;
  70   int i, j;
  71
  72   for (i = 0; i < INTRA_MODES; ++i)
  73     for (j = 0; j < INTRA_MODES; ++j)
  74       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
  75                       vp9_intra_mode_tree);
  76
  77   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
  78   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
  79                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  80   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
  81                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  82
  83   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  84     vp9_cost_tokens(cpi->switchable_interp_costs[i],
  85                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
  86 }
  87
  88 static void fill_token_costs(vp9_coeff_cost *c,
  89                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
  90   int i, j, k, l;
  91   TX_SIZE t;
  92   for (t = TX_4X4; t <= TX_32X32; ++t)
  93     for (i = 0; i < PLANE_TYPES; ++i)
  94       for (j = 0; j < REF_TYPES; ++j)
  95         for (k = 0; k < COEF_BANDS; ++k)
  96           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
  97             vp9_prob probs[ENTROPY_NODES];
  98             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
  99             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
 100                             vp9_coef_tree);
 101             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
 102                                  vp9_coef_tree);
 103             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
 104                    c[t][i][j][k][1][l][EOB_TOKEN]);
 105           }
 106 }
 107
 108 // Values are now correlated to quantizer.
 109 static int sad_per_bit16lut_8[QINDEX_RANGE];
 110 static int sad_per_bit4lut_8[QINDEX_RANGE];
 111
 112 #if CONFIG_VP9_HIGHBITDEPTH
 113 static int sad_per_bit16lut_10[QINDEX_RANGE];
 114 static int sad_per_bit4lut_10[QINDEX_RANGE];
 115 static int sad_per_bit16lut_12[QINDEX_RANGE];
 116 static int sad_per_bit4lut_12[QINDEX_RANGE];
 117 #endif
 118
 119 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
 120                             vpx_bit_depth_t bit_depth) {
 121   int i;
 122   // Initialize the sad lut tables using a formulaic calculation for now.
 123   // This is to make it easier to resolve the impact of experimental changes
 124   // to the quantizer tables.
 125   for (i = 0; i < range; i++) {
 126     const double q = vp9_convert_qindex_to_q(i, bit_depth);
 127     bit16lut[i] = (int)(0.0418 * q + 2.4107);
 128     bit4lut[i] = (int)(0.063 * q + 2.742);
 129   }
 130 }
 131
 132 void vp9_init_me_luts(void) {
 133   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
 134                   VPX_BITS_8);
 135 #if CONFIG_VP9_HIGHBITDEPTH
 136   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
 137                   VPX_BITS_10);
 138   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
 139                   VPX_BITS_12);
 140 #endif
 141 }
 142
 143 static const int rd_boost_factor[16] = {
 144   64, 32, 32, 32, 24, 16, 12, 12,
 145   8, 8, 4, 4, 2, 2, 1, 0
 146 };
 147 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
 148   128, 144, 128, 128, 144
 149 };
 150
 151 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
 152   const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
 153 #if CONFIG_VP9_HIGHBITDEPTH
 154   int64_t rdmult = 0;
 155   switch (cpi->common.bit_depth) {
 156     case VPX_BITS_8:
 157       rdmult = 88 * q * q / 24;
 158       break;
 159     case VPX_BITS_10:
 160       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
 161       break;
 162     case VPX_BITS_12:
 163       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
 164       break;
 165     default:
 166       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 167       return -1;
 168   }
 169 #else
 170   int64_t rdmult = 88 * q * q / 24;
 171 #endif  // CONFIG_VP9_HIGHBITDEPTH
 172   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 173     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 174     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
 175     const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
 176
 177     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
 178     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
 179   }
 180   return (int)rdmult;
 181 }
 182
 183 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
 184   double q;
 185 #if CONFIG_VP9_HIGHBITDEPTH
 186   switch (bit_depth) {
 187     case VPX_BITS_8:
 188       q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 189       break;
 190     case VPX_BITS_10:
 191       q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
 192       break;
 193     case VPX_BITS_12:
 194       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
 195       break;
 196     default:
 197       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 198       return -1;
 199   }
 200 #else
 201   (void) bit_depth;
 202   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 203 #endif  // CONFIG_VP9_HIGHBITDEPTH
 204   // TODO(debargha): Adjust the function below.
 205   return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
 206 }
 207
 208 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
 209 #if CONFIG_VP9_HIGHBITDEPTH
 210   switch (cpi->common.bit_depth) {
 211     case VPX_BITS_8:
 212       x->sadperbit16 = sad_per_bit16lut_8[qindex];
 213       x->sadperbit4 = sad_per_bit4lut_8[qindex];
 214       break;
 215     case VPX_BITS_10:
 216       x->sadperbit16 = sad_per_bit16lut_10[qindex];
 217       x->sadperbit4 = sad_per_bit4lut_10[qindex];
 218       break;
 219     case VPX_BITS_12:
 220       x->sadperbit16 = sad_per_bit16lut_12[qindex];
 221       x->sadperbit4 = sad_per_bit4lut_12[qindex];
 222       break;
 223     default:
 224       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 225   }
 226 #else
 227   (void)cpi;
 228   x->sadperbit16 = sad_per_bit16lut_8[qindex];
 229   x->sadperbit4 = sad_per_bit4lut_8[qindex];
 230 #endif  // CONFIG_VP9_HIGHBITDEPTH
 231 }
 232
 233 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
 234   int i, bsize, segment_id;
 235
 236   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 237     const int qindex =
 238         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
 239               cm->y_dc_delta_q, 0, MAXQ);
 240     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
 241
 242     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 243       // Threshold here seems unnecessarily harsh but fine given actual
 244       // range of values used for cpi->sf.thresh_mult[].
 245       const int t = q * rd_thresh_block_size_factor[bsize];
 246       const int thresh_max = INT_MAX / t;
 247
 248       if (bsize >= BLOCK_8X8) {
 249         for (i = 0; i < MAX_MODES; ++i)
 250           rd->threshes[segment_id][bsize][i] =
 251               rd->thresh_mult[i] < thresh_max
 252                   ? rd->thresh_mult[i] * t / 4
 253                   : INT_MAX;
 254       } else {
 255         for (i = 0; i < MAX_REFS; ++i)
 256           rd->threshes[segment_id][bsize][i] =
 257               rd->thresh_mult_sub8x8[i] < thresh_max
 258                   ? rd->thresh_mult_sub8x8[i] * t / 4
 259                   : INT_MAX;
 260       }
 261     }
 262   }
 263 }
 264
 265 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 266   VP9_COMMON *const cm = &cpi->common;
 267   MACROBLOCK *const x = &cpi->td.mb;
 268   RD_OPT *const rd = &cpi->rd;
 269   int i;
 270
 271   vp9_clear_system_state();
 272
 273   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
 274   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 275
 276   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
 277   x->errorperbit += (x->errorperbit == 0);
 278
 279   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 280                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 281
 282   set_block_thresholds(cm, rd);
 283
 284   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
 285     fill_token_costs(x->token_costs, cm->fc->coef_probs);
 286
 287   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
 288       cm->frame_type == KEY_FRAME) {
 289     for (i = 0; i < PARTITION_CONTEXTS; ++i)
 290       vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
 291                       vp9_partition_tree);
 292   }
 293
 294   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
 295       cm->frame_type == KEY_FRAME) {
 296     fill_mode_costs(cpi);
 297
 298     if (!frame_is_intra_only(cm)) {
 299       vp9_build_nmv_cost_table(x->nmvjointcost,
 300                                cm->allow_high_precision_mv ? x->nmvcost_hp
 301                                                            : x->nmvcost,
 302                                &cm->fc->nmvc, cm->allow_high_precision_mv);
 303
 304       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 305         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
 306                         cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
 307     }
 308   }
 309 }
 310
 311 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 312   // NOTE: The tables below must be of the same size.
 313
 314   // The functions described below are sampled at the four most significant
 315   // bits of x^2 + 8 / 256.
 316
 317   // Normalized rate:
 318   // This table models the rate for a Laplacian source with given variance
 319   // when quantized with a uniform quantizer with given stepsize. The
 320   // closed form expression is:
 321   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 322   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 323   // and H(x) is the binary entropy function.
 324   static const int rate_tab_q10[] = {
 325     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 326      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 327      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 328      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 329      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 330      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 331      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 332      1159,  1086,  1021,   963,   911,   864,   821,   781,
 333       745,   680,   623,   574,   530,   490,   455,   424,
 334       395,   345,   304,   269,   239,   213,   190,   171,
 335       154,   126,   104,    87,    73,    61,    52,    44,
 336        38,    28,    21,    16,    12,    10,     8,     6,
 337         5,     3,     2,     1,     1,     1,     0,     0,
 338   };
 339   // Normalized distortion:
 340   // This table models the normalized distortion for a Laplacian source
 341   // with given variance when quantized with a uniform quantizer
 342   // with given stepsize. The closed form expression is:
 343   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 344   // where x = qpstep / sqrt(variance).
 345   // Note the actual distortion is Dn * variance.
 346   static const int dist_tab_q10[] = {
 347        0,     0,     1,     1,     1,     2,     2,     2,
 348        3,     3,     4,     5,     5,     6,     7,     7,
 349        8,     9,    11,    12,    13,    15,    16,    17,
 350       18,    21,    24,    26,    29,    31,    34,    36,
 351       39,    44,    49,    54,    59,    64,    69,    73,
 352       78,    88,    97,   106,   115,   124,   133,   142,
 353      151,   167,   184,   200,   215,   231,   245,   260,
 354      274,   301,   327,   351,   375,   397,   418,   439,
 355      458,   495,   528,   559,   587,   613,   637,   659,
 356      680,   717,   749,   777,   801,   823,   842,   859,
 357      874,   899,   919,   936,   949,   960,   969,   977,
 358      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 359     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 360   };
 361   static const int xsq_iq_q10[] = {
 362          0,      4,      8,     12,     16,     20,     24,     28,
 363         32,     40,     48,     56,     64,     72,     80,     88,
 364         96,    112,    128,    144,    160,    176,    192,    208,
 365        224,    256,    288,    320,    352,    384,    416,    448,
 366        480,    544,    608,    672,    736,    800,    864,    928,
 367        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 368       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 369       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 370       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 371      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 372      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 373      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 374     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 375   };
 376   const int tmp = (xsq_q10 >> 2) + 8;
 377   const int k = get_msb(tmp) - 3;
 378   const int xq = (k << 3) + ((tmp >> k) & 0x7);
 379   const int one_q10 = 1 << 10;
 380   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 381   const int b_q10 = one_q10 - a_q10;
 382   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 383   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 384 }
 385
 386 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
 387                                   unsigned int qstep, int *rate,
 388                                   int64_t *dist) {
 389   // This function models the rate and distortion for a Laplacian
 390   // source with given variance when quantized with a uniform quantizer
 391   // with given stepsize. The closed form expressions are in:
 392   // Hang and Chen, "Source Model for transform video coder and its
 393   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 394   // Sys. for Video Tech., April 1997.
 395   if (var == 0) {
 396     *rate = 0;
 397     *dist = 0;
 398   } else {
 399     int d_q10, r_q10;
 400     static const uint32_t MAX_XSQ_Q10 = 245727;
 401     const uint64_t xsq_q10_64 =
 402         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
 403     const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
 404     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 405     *rate = ((r_q10 << n_log2) + 2) >> 2;
 406     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 407   }
 408 }
 409
 410 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 411                               const struct macroblockd_plane *pd,
 412                               ENTROPY_CONTEXT t_above[16],
 413                               ENTROPY_CONTEXT t_left[16]) {
 414   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 415   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 416   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 417   const ENTROPY_CONTEXT *const above = pd->above_context;
 418   const ENTROPY_CONTEXT *const left = pd->left_context;
 419
 420   int i;
 421   switch (tx_size) {
 422     case TX_4X4:
 423       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 424       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 425       break;
 426     case TX_8X8:
 427       for (i = 0; i < num_4x4_w; i += 2)
 428         t_above[i] = !!*(const uint16_t *)&above[i];
 429       for (i = 0; i < num_4x4_h; i += 2)
 430         t_left[i] = !!*(const uint16_t *)&left[i];
 431       break;
 432     case TX_16X16:
 433       for (i = 0; i < num_4x4_w; i += 4)
 434         t_above[i] = !!*(const uint32_t *)&above[i];
 435       for (i = 0; i < num_4x4_h; i += 4)
 436         t_left[i] = !!*(const uint32_t *)&left[i];
 437       break;
 438     case TX_32X32:
 439       for (i = 0; i < num_4x4_w; i += 8)
 440         t_above[i] = !!*(const uint64_t *)&above[i];
 441       for (i = 0; i < num_4x4_h; i += 8)
 442         t_left[i] = !!*(const uint64_t *)&left[i];
 443       break;
 444     default:
 445       assert(0 && "Invalid transform size.");
 446       break;
 447   }
 448 }
 449
 450 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
 451                  uint8_t *ref_y_buffer, int ref_y_stride,
 452                  int ref_frame, BLOCK_SIZE block_size) {
 453   MACROBLOCKD *xd = &x->e_mbd;
 454   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 455   int i;
 456   int zero_seen = 0;
 457   int best_index = 0;
 458   int best_sad = INT_MAX;
 459   int this_sad = INT_MAX;
 460   int max_mv = 0;
 461   int near_same_nearest;
 462   uint8_t *src_y_ptr = x->plane[0].src.buf;
 463   uint8_t *ref_y_ptr;
 464   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
 465                     (cpi->sf.adaptive_motion_search &&
 466                      block_size < x->max_partition_size);
 467
 468   MV pred_mv[3];
 469   pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
 470   pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
 471   pred_mv[2] = x->pred_mv[ref_frame];
 472   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
 473
 474   near_same_nearest =
 475       mbmi->ref_mvs[ref_frame][0].as_int == mbmi->ref_mvs[ref_frame][1].as_int;
 476   // Get the sad for each candidate reference mv.
 477   for (i = 0; i < num_mv_refs; ++i) {
 478     const MV *this_mv = &pred_mv[i];
 479     int fp_row, fp_col;
 480
 481     if (i == 1 && near_same_nearest)
 482       continue;
 483     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
 484     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
 485     max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
 486
 487     if (fp_row ==0 && fp_col == 0 && zero_seen)
 488       continue;
 489     zero_seen |= (fp_row ==0 && fp_col == 0);
 490
 491     ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
 492     // Find sad for current vector.
 493     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
 494                                            ref_y_ptr, ref_y_stride);
 495     // Note if it is the best so far.
 496     if (this_sad < best_sad) {
 497       best_sad = this_sad;
 498       best_index = i;
 499     }
 500   }
 501
 502   // Note the index of the mv that worked best in the reference list.
 503   x->mv_best_ref_index[ref_frame] = best_index;
 504   x->max_mv_context[ref_frame] = max_mv;
 505   x->pred_mv_sad[ref_frame] = best_sad;
 506 }
 507
 508 void vp9_setup_pred_block(const MACROBLOCKD *xd,
 509                           struct buf_2d dst[MAX_MB_PLANE],
 510                           const YV12_BUFFER_CONFIG *src,
 511                           int mi_row, int mi_col,
 512                           const struct scale_factors *scale,
 513                           const struct scale_factors *scale_uv) {
 514   int i;
 515
 516   dst[0].buf = src->y_buffer;
 517   dst[0].stride = src->y_stride;
 518   dst[1].buf = src->u_buffer;
 519   dst[2].buf = src->v_buffer;
 520   dst[1].stride = dst[2].stride = src->uv_stride;
 521
 522   for (i = 0; i < MAX_MB_PLANE; ++i) {
 523     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
 524                      i ? scale_uv : scale,
 525                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
 526   }
 527 }
 528
 529 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
 530                             int raster_block, int stride) {
 531   const int bw = b_width_log2_lookup[plane_bsize];
 532   const int y = 4 * (raster_block >> bw);
 533   const int x = 4 * (raster_block & ((1 << bw) - 1));
 534   return y * stride + x;
 535 }
 536
 537 int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 538                                        int raster_block, int16_t *base) {
 539   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 540   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
 541 }
 542
 543 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
 544                                              int ref_frame) {
 545   const VP9_COMMON *const cm = &cpi->common;
 546   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
 547   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
 548   return
 549       (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
 550           &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
 551 }
 552
 553 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
 554   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 555   const int ctx = vp9_get_pred_context_switchable_interp(xd);
 556   return SWITCHABLE_INTERP_RATE_FACTOR *
 557              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 558 }
 559
 560 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
 561   int i;
 562   RD_OPT *const rd = &cpi->rd;
 563   SPEED_FEATURES *const sf = &cpi->sf;
 564
 565   // Set baseline threshold values.
 566   for (i = 0; i < MAX_MODES; ++i)
 567     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 568
 569   if (sf->adaptive_rd_thresh) {
 570     rd->thresh_mult[THR_NEARESTMV] = 300;
 571     rd->thresh_mult[THR_NEARESTG] = 300;
 572     rd->thresh_mult[THR_NEARESTA] = 300;
 573   } else {
 574     rd->thresh_mult[THR_NEARESTMV] = 0;
 575     rd->thresh_mult[THR_NEARESTG] = 0;
 576     rd->thresh_mult[THR_NEARESTA] = 0;
 577   }
 578
 579   rd->thresh_mult[THR_DC] += 1000;
 580
 581   rd->thresh_mult[THR_NEWMV] += 1000;
 582   rd->thresh_mult[THR_NEWA] += 1000;
 583   rd->thresh_mult[THR_NEWG] += 1000;
 584
 585   rd->thresh_mult[THR_NEARMV] += 1000;
 586   rd->thresh_mult[THR_NEARA] += 1000;
 587   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
 588   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
 589
 590   rd->thresh_mult[THR_TM] += 1000;
 591
 592   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
 593   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
 594   rd->thresh_mult[THR_NEARG] += 1000;
 595   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
 596   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
 597
 598   rd->thresh_mult[THR_ZEROMV] += 2000;
 599   rd->thresh_mult[THR_ZEROG] += 2000;
 600   rd->thresh_mult[THR_ZEROA] += 2000;
 601   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
 602   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
 603
 604   rd->thresh_mult[THR_H_PRED] += 2000;
 605   rd->thresh_mult[THR_V_PRED] += 2000;
 606   rd->thresh_mult[THR_D45_PRED ] += 2500;
 607   rd->thresh_mult[THR_D135_PRED] += 2500;
 608   rd->thresh_mult[THR_D117_PRED] += 2500;
 609   rd->thresh_mult[THR_D153_PRED] += 2500;
 610   rd->thresh_mult[THR_D207_PRED] += 2500;
 611   rd->thresh_mult[THR_D63_PRED] += 2500;
 612 }
 613
 614 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
 615   static const int thresh_mult[2][MAX_REFS] =
 616       {{2500, 2500, 2500, 4500, 4500, 2500},
 617        {2000, 2000, 2000, 4000, 4000, 2000}};
 618   RD_OPT *const rd = &cpi->rd;
 619   const int idx = cpi->oxcf.mode == BEST;
 620   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
 621 }
 622
 623 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
 624                                int bsize, int best_mode_index) {
 625   if (rd_thresh > 0) {
 626     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
 627     int mode;
 628     for (mode = 0; mode < top_mode; ++mode) {
 629       const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
 630       const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
 631       BLOCK_SIZE bs;
 632       for (bs = min_size; bs <= max_size; ++bs) {
 633         int *const fact = &factor_buf[bs][mode];
 634         if (mode == best_mode_index) {
 635           *fact -= (*fact >> 4);
 636         } else {
 637           *fact = MIN(*fact + RD_THRESH_INC,
 638                       rd_thresh * RD_THRESH_MAX_FACT);
 639         }
 640       }
 641     }
 642   }
 643 }
 644
 645 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
 646                                vpx_bit_depth_t bit_depth) {
 647   const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
 648 #if CONFIG_VP9_HIGHBITDEPTH
 649   switch (bit_depth) {
 650     case VPX_BITS_8:
 651       return 20 * q;
 652     case VPX_BITS_10:
 653       return 5 * q;
 654     case VPX_BITS_12:
 655       return ROUND_POWER_OF_TWO(5 * q, 2);
 656     default:
 657       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 658       return -1;
 659   }
 660 #else
 661   return 20 * q;
 662 #endif  // CONFIG_VP9_HIGHBITDEPTH
 663 }
 664