granicus.if.org Git - libvpx/blob - vp10/encoder/rd.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14
  15 #include "./vp10_rtcd.h"
  16
  17 #include "vpx_mem/vpx_mem.h"
  18 #include "vpx_ports/bitops.h"
  19 #include "vpx_ports/mem.h"
  20 #include "vpx_ports/system_state.h"
  21
  22 #include "vp10/common/common.h"
  23 #include "vp10/common/entropy.h"
  24 #include "vp10/common/entropymode.h"
  25 #include "vp10/common/mvref_common.h"
  26 #include "vp10/common/pred_common.h"
  27 #include "vp10/common/quant_common.h"
  28 #include "vp10/common/reconinter.h"
  29 #include "vp10/common/reconintra.h"
  30 #include "vp10/common/seg_common.h"
  31
  32 #include "vp10/encoder/cost.h"
  33 #include "vp10/encoder/encodemb.h"
  34 #include "vp10/encoder/encodemv.h"
  35 #include "vp10/encoder/encoder.h"
  36 #include "vp10/encoder/mcomp.h"
  37 #include "vp10/encoder/quantize.h"
  38 #include "vp10/encoder/ratectrl.h"
  39 #include "vp10/encoder/rd.h"
  40 #include "vp10/encoder/tokenize.h"
  41
  42 #define RD_THRESH_POW      1.25
  43 #define RD_MULT_EPB_RATIO  64
  44
  45 // Factor to weigh the rate for switchable interp filters.
  46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  47
  48 void vp10_rd_cost_reset(RD_COST *rd_cost) {
  49   rd_cost->rate = INT_MAX;
  50   rd_cost->dist = INT64_MAX;
  51   rd_cost->rdcost = INT64_MAX;
  52 }
  53
  54 void vp10_rd_cost_init(RD_COST *rd_cost) {
  55   rd_cost->rate = 0;
  56   rd_cost->dist = 0;
  57   rd_cost->rdcost = 0;
  58 }
  59
  60 // The baseline rd thresholds for breaking out of the rd loop for
  61 // certain modes are assumed to be based on 8x8 blocks.
  62 // This table is used to correct for block size.
  63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
  64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
  65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
  66 };
  67
  68 static void fill_mode_costs(VP10_COMP *cpi) {
  69   const FRAME_CONTEXT *const fc = cpi->common.fc;
  70   int i, j;
  71
  72   for (i = 0; i < INTRA_MODES; ++i)
  73     for (j = 0; j < INTRA_MODES; ++j)
  74       vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
  75                       vp10_intra_mode_tree);
  76
  77   vp10_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp10_intra_mode_tree);
  78   vp10_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
  79                   vp10_kf_uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
  80   vp10_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
  81                   fc->uv_mode_prob[TM_PRED], vp10_intra_mode_tree);
  82
  83   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  84     vp10_cost_tokens(cpi->switchable_interp_costs[i],
  85                     fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
  86 }
  87
  88 static void fill_token_costs(vp10_coeff_cost *c,
  89                              vp10_coeff_probs_model (*p)[PLANE_TYPES]) {
  90   int i, j, k, l;
  91   TX_SIZE t;
  92   for (t = TX_4X4; t <= TX_32X32; ++t)
  93     for (i = 0; i < PLANE_TYPES; ++i)
  94       for (j = 0; j < REF_TYPES; ++j)
  95         for (k = 0; k < COEF_BANDS; ++k)
  96           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
  97             vpx_prob probs[ENTROPY_NODES];
  98             vp10_model_to_full_probs(p[t][i][j][k][l], probs);
  99             vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs,
 100                             vp10_coef_tree);
 101             vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
 102                                  vp10_coef_tree);
 103             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
 104                    c[t][i][j][k][1][l][EOB_TOKEN]);
 105           }
 106 }
 107
 108 // Values are now correlated to quantizer.
 109 static int sad_per_bit16lut_8[QINDEX_RANGE];
 110 static int sad_per_bit4lut_8[QINDEX_RANGE];
 111
 112 #if CONFIG_VP9_HIGHBITDEPTH
 113 static int sad_per_bit16lut_10[QINDEX_RANGE];
 114 static int sad_per_bit4lut_10[QINDEX_RANGE];
 115 static int sad_per_bit16lut_12[QINDEX_RANGE];
 116 static int sad_per_bit4lut_12[QINDEX_RANGE];
 117 #endif
 118
 119 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
 120                             vpx_bit_depth_t bit_depth) {
 121   int i;
 122   // Initialize the sad lut tables using a formulaic calculation for now.
 123   // This is to make it easier to resolve the impact of experimental changes
 124   // to the quantizer tables.
 125   for (i = 0; i < range; i++) {
 126     const double q = vp10_convert_qindex_to_q(i, bit_depth);
 127     bit16lut[i] = (int)(0.0418 * q + 2.4107);
 128     bit4lut[i] = (int)(0.063 * q + 2.742);
 129   }
 130 }
 131
 132 void vp10_init_me_luts(void) {
 133   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
 134                   VPX_BITS_8);
 135 #if CONFIG_VP9_HIGHBITDEPTH
 136   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
 137                   VPX_BITS_10);
 138   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
 139                   VPX_BITS_12);
 140 #endif
 141 }
 142
 143 static const int rd_boost_factor[16] = {
 144   64, 32, 32, 32, 24, 16, 12, 12,
 145   8, 8, 4, 4, 2, 2, 1, 0
 146 };
 147 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
 148   128, 144, 128, 128, 144
 149 };
 150
 151 int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
 152   const int64_t q = vp10_dc_quant(qindex, 0, cpi->common.bit_depth);
 153 #if CONFIG_VP9_HIGHBITDEPTH
 154   int64_t rdmult = 0;
 155   switch (cpi->common.bit_depth) {
 156     case VPX_BITS_8:
 157       rdmult = 88 * q * q / 24;
 158       break;
 159     case VPX_BITS_10:
 160       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
 161       break;
 162     case VPX_BITS_12:
 163       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
 164       break;
 165     default:
 166       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 167       return -1;
 168   }
 169 #else
 170   int64_t rdmult = 88 * q * q / 24;
 171 #endif  // CONFIG_VP9_HIGHBITDEPTH
 172   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 173     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 174     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
 175     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
 176
 177     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
 178     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
 179   }
 180   if (rdmult < 1)
 181     rdmult = 1;
 182   return (int)rdmult;
 183 }
 184
 185 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
 186   double q;
 187 #if CONFIG_VP9_HIGHBITDEPTH
 188   switch (bit_depth) {
 189     case VPX_BITS_8:
 190       q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 191       break;
 192     case VPX_BITS_10:
 193       q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
 194       break;
 195     case VPX_BITS_12:
 196       q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
 197       break;
 198     default:
 199       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 200       return -1;
 201   }
 202 #else
 203   (void) bit_depth;
 204   q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
 205 #endif  // CONFIG_VP9_HIGHBITDEPTH
 206   // TODO(debargha): Adjust the function below.
 207   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
 208 }
 209
 210 void vp10_initialize_me_consts(VP10_COMP *cpi, MACROBLOCK *x, int qindex) {
 211 #if CONFIG_VP9_HIGHBITDEPTH
 212   switch (cpi->common.bit_depth) {
 213     case VPX_BITS_8:
 214       x->sadperbit16 = sad_per_bit16lut_8[qindex];
 215       x->sadperbit4 = sad_per_bit4lut_8[qindex];
 216       break;
 217     case VPX_BITS_10:
 218       x->sadperbit16 = sad_per_bit16lut_10[qindex];
 219       x->sadperbit4 = sad_per_bit4lut_10[qindex];
 220       break;
 221     case VPX_BITS_12:
 222       x->sadperbit16 = sad_per_bit16lut_12[qindex];
 223       x->sadperbit4 = sad_per_bit4lut_12[qindex];
 224       break;
 225     default:
 226       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 227   }
 228 #else
 229   (void)cpi;
 230   x->sadperbit16 = sad_per_bit16lut_8[qindex];
 231   x->sadperbit4 = sad_per_bit4lut_8[qindex];
 232 #endif  // CONFIG_VP9_HIGHBITDEPTH
 233 }
 234
 235 static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
 236   int i, bsize, segment_id;
 237
 238   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 239     const int qindex =
 240         clamp(vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
 241               cm->y_dc_delta_q, 0, MAXQ);
 242     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
 243
 244     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 245       // Threshold here seems unnecessarily harsh but fine given actual
 246       // range of values used for cpi->sf.thresh_mult[].
 247       const int t = q * rd_thresh_block_size_factor[bsize];
 248       const int thresh_max = INT_MAX / t;
 249
 250       if (bsize >= BLOCK_8X8) {
 251         for (i = 0; i < MAX_MODES; ++i)
 252           rd->threshes[segment_id][bsize][i] =
 253               rd->thresh_mult[i] < thresh_max
 254                   ? rd->thresh_mult[i] * t / 4
 255                   : INT_MAX;
 256       } else {
 257         for (i = 0; i < MAX_REFS; ++i)
 258           rd->threshes[segment_id][bsize][i] =
 259               rd->thresh_mult_sub8x8[i] < thresh_max
 260                   ? rd->thresh_mult_sub8x8[i] * t / 4
 261                   : INT_MAX;
 262       }
 263     }
 264   }
 265 }
 266
 267 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
 268   VP10_COMMON *const cm = &cpi->common;
 269   MACROBLOCK *const x = &cpi->td.mb;
 270   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
 271   RD_OPT *const rd = &cpi->rd;
 272   int i;
 273
 274   vpx_clear_system_state();
 275
 276   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
 277   rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 278
 279   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
 280   x->errorperbit += (x->errorperbit == 0);
 281
 282   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 283                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 284
 285   set_block_thresholds(cm, rd);
 286   set_partition_probs(cm, xd);
 287
 288   fill_token_costs(x->token_costs, cm->fc->coef_probs);
 289
 290   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
 291       cm->frame_type == KEY_FRAME) {
 292     for (i = 0; i < PARTITION_CONTEXTS; ++i)
 293       vp10_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
 294                       vp10_partition_tree);
 295   }
 296
 297   fill_mode_costs(cpi);
 298
 299   if (!frame_is_intra_only(cm)) {
 300     vp10_build_nmv_cost_table(x->nmvjointcost,
 301                              cm->allow_high_precision_mv ? x->nmvcost_hp
 302                                                          : x->nmvcost,
 303                              &cm->fc->nmvc, cm->allow_high_precision_mv);
 304
 305     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 306       vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
 307                       cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
 308   }
 309 }
 310
 311 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 312   // NOTE: The tables below must be of the same size.
 313
 314   // The functions described below are sampled at the four most significant
 315   // bits of x^2 + 8 / 256.
 316
 317   // Normalized rate:
 318   // This table models the rate for a Laplacian source with given variance
 319   // when quantized with a uniform quantizer with given stepsize. The
 320   // closed form expression is:
 321   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 322   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 323   // and H(x) is the binary entropy function.
 324   static const int rate_tab_q10[] = {
 325     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 326      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 327      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 328      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 329      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 330      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 331      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 332      1159,  1086,  1021,   963,   911,   864,   821,   781,
 333       745,   680,   623,   574,   530,   490,   455,   424,
 334       395,   345,   304,   269,   239,   213,   190,   171,
 335       154,   126,   104,    87,    73,    61,    52,    44,
 336        38,    28,    21,    16,    12,    10,     8,     6,
 337         5,     3,     2,     1,     1,     1,     0,     0,
 338   };
 339   // Normalized distortion:
 340   // This table models the normalized distortion for a Laplacian source
 341   // with given variance when quantized with a uniform quantizer
 342   // with given stepsize. The closed form expression is:
 343   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 344   // where x = qpstep / sqrt(variance).
 345   // Note the actual distortion is Dn * variance.
 346   static const int dist_tab_q10[] = {
 347        0,     0,     1,     1,     1,     2,     2,     2,
 348        3,     3,     4,     5,     5,     6,     7,     7,
 349        8,     9,    11,    12,    13,    15,    16,    17,
 350       18,    21,    24,    26,    29,    31,    34,    36,
 351       39,    44,    49,    54,    59,    64,    69,    73,
 352       78,    88,    97,   106,   115,   124,   133,   142,
 353      151,   167,   184,   200,   215,   231,   245,   260,
 354      274,   301,   327,   351,   375,   397,   418,   439,
 355      458,   495,   528,   559,   587,   613,   637,   659,
 356      680,   717,   749,   777,   801,   823,   842,   859,
 357      874,   899,   919,   936,   949,   960,   969,   977,
 358      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 359     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 360   };
 361   static const int xsq_iq_q10[] = {
 362          0,      4,      8,     12,     16,     20,     24,     28,
 363         32,     40,     48,     56,     64,     72,     80,     88,
 364         96,    112,    128,    144,    160,    176,    192,    208,
 365        224,    256,    288,    320,    352,    384,    416,    448,
 366        480,    544,    608,    672,    736,    800,    864,    928,
 367        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 368       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 369       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 370       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 371      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 372      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 373      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 374     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 375   };
 376   const int tmp = (xsq_q10 >> 2) + 8;
 377   const int k = get_msb(tmp) - 3;
 378   const int xq = (k << 3) + ((tmp >> k) & 0x7);
 379   const int one_q10 = 1 << 10;
 380   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 381   const int b_q10 = one_q10 - a_q10;
 382   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 383   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 384 }
 385
 386 void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
 387                                   unsigned int qstep, int *rate,
 388                                   int64_t *dist) {
 389   // This function models the rate and distortion for a Laplacian
 390   // source with given variance when quantized with a uniform quantizer
 391   // with given stepsize. The closed form expressions are in:
 392   // Hang and Chen, "Source Model for transform video coder and its
 393   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 394   // Sys. for Video Tech., April 1997.
 395   if (var == 0) {
 396     *rate = 0;
 397     *dist = 0;
 398   } else {
 399     int d_q10, r_q10;
 400     static const uint32_t MAX_XSQ_Q10 = 245727;
 401     const uint64_t xsq_q10_64 =
 402         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
 403     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
 404     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 405     *rate = ((r_q10 << n_log2) + 2) >> 2;
 406     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 407   }
 408 }
 409
 410 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 411                               const struct macroblockd_plane *pd,
 412                               ENTROPY_CONTEXT t_above[16],
 413                               ENTROPY_CONTEXT t_left[16]) {
 414   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 415   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 416   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 417   const ENTROPY_CONTEXT *const above = pd->above_context;
 418   const ENTROPY_CONTEXT *const left = pd->left_context;
 419
 420   int i;
 421   switch (tx_size) {
 422     case TX_4X4:
 423       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 424       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 425       break;
 426     case TX_8X8:
 427       for (i = 0; i < num_4x4_w; i += 2)
 428         t_above[i] = !!*(const uint16_t *)&above[i];
 429       for (i = 0; i < num_4x4_h; i += 2)
 430         t_left[i] = !!*(const uint16_t *)&left[i];
 431       break;
 432     case TX_16X16:
 433       for (i = 0; i < num_4x4_w; i += 4)
 434         t_above[i] = !!*(const uint32_t *)&above[i];
 435       for (i = 0; i < num_4x4_h; i += 4)
 436         t_left[i] = !!*(const uint32_t *)&left[i];
 437       break;
 438     case TX_32X32:
 439       for (i = 0; i < num_4x4_w; i += 8)
 440         t_above[i] = !!*(const uint64_t *)&above[i];
 441       for (i = 0; i < num_4x4_h; i += 8)
 442         t_left[i] = !!*(const uint64_t *)&left[i];
 443       break;
 444     default:
 445       assert(0 && "Invalid transform size.");
 446       break;
 447   }
 448 }
 449
 450 void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
 451                  uint8_t *ref_y_buffer, int ref_y_stride,
 452                  int ref_frame, BLOCK_SIZE block_size) {
 453   int i;
 454   int zero_seen = 0;
 455   int best_index = 0;
 456   int best_sad = INT_MAX;
 457   int this_sad = INT_MAX;
 458   int max_mv = 0;
 459   int near_same_nearest;
 460   uint8_t *src_y_ptr = x->plane[0].src.buf;
 461   uint8_t *ref_y_ptr;
 462   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
 463                     (cpi->sf.adaptive_motion_search &&
 464                      block_size < x->max_partition_size);
 465
 466   MV pred_mv[3];
 467   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
 468   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
 469   pred_mv[2] = x->pred_mv[ref_frame];
 470   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
 471
 472   near_same_nearest =
 473       x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
 474           x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
 475   // Get the sad for each candidate reference mv.
 476   for (i = 0; i < num_mv_refs; ++i) {
 477     const MV *this_mv = &pred_mv[i];
 478     int fp_row, fp_col;
 479
 480     if (i == 1 && near_same_nearest)
 481       continue;
 482     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
 483     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
 484     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
 485
 486     if (fp_row ==0 && fp_col == 0 && zero_seen)
 487       continue;
 488     zero_seen |= (fp_row ==0 && fp_col == 0);
 489
 490     ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
 491     // Find sad for current vector.
 492     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
 493                                            ref_y_ptr, ref_y_stride);
 494     // Note if it is the best so far.
 495     if (this_sad < best_sad) {
 496       best_sad = this_sad;
 497       best_index = i;
 498     }
 499   }
 500
 501   // Note the index of the mv that worked best in the reference list.
 502   x->mv_best_ref_index[ref_frame] = best_index;
 503   x->max_mv_context[ref_frame] = max_mv;
 504   x->pred_mv_sad[ref_frame] = best_sad;
 505 }
 506
 507 void vp10_setup_pred_block(const MACROBLOCKD *xd,
 508                           struct buf_2d dst[MAX_MB_PLANE],
 509                           const YV12_BUFFER_CONFIG *src,
 510                           int mi_row, int mi_col,
 511                           const struct scale_factors *scale,
 512                           const struct scale_factors *scale_uv) {
 513   int i;
 514
 515   dst[0].buf = src->y_buffer;
 516   dst[0].stride = src->y_stride;
 517   dst[1].buf = src->u_buffer;
 518   dst[2].buf = src->v_buffer;
 519   dst[1].stride = dst[2].stride = src->uv_stride;
 520
 521   for (i = 0; i < MAX_MB_PLANE; ++i) {
 522     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
 523                      i ? scale_uv : scale,
 524                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
 525   }
 526 }
 527
 528 int vp10_raster_block_offset(BLOCK_SIZE plane_bsize,
 529                             int raster_block, int stride) {
 530   const int bw = b_width_log2_lookup[plane_bsize];
 531   const int y = 4 * (raster_block >> bw);
 532   const int x = 4 * (raster_block & ((1 << bw) - 1));
 533   return y * stride + x;
 534 }
 535
 536 int16_t* vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 537                                        int raster_block, int16_t *base) {
 538   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 539   return base + vp10_raster_block_offset(plane_bsize, raster_block, stride);
 540 }
 541
 542 YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const VP10_COMP *cpi,
 543                                              int ref_frame) {
 544   const VP10_COMMON *const cm = &cpi->common;
 545   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
 546   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
 547   return
 548       (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
 549           &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
 550 }
 551
 552 int vp10_get_switchable_rate(const VP10_COMP *cpi,
 553                              const MACROBLOCKD *const xd) {
 554   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 555   const int ctx = vp10_get_pred_context_switchable_interp(xd);
 556   return SWITCHABLE_INTERP_RATE_FACTOR *
 557              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 558 }
 559
 560 void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
 561   int i;
 562   RD_OPT *const rd = &cpi->rd;
 563   SPEED_FEATURES *const sf = &cpi->sf;
 564
 565   // Set baseline threshold values.
 566   for (i = 0; i < MAX_MODES; ++i)
 567     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 568
 569   if (sf->adaptive_rd_thresh) {
 570     rd->thresh_mult[THR_NEARESTMV] = 300;
 571     rd->thresh_mult[THR_NEARESTG] = 300;
 572     rd->thresh_mult[THR_NEARESTA] = 300;
 573   } else {
 574     rd->thresh_mult[THR_NEARESTMV] = 0;
 575     rd->thresh_mult[THR_NEARESTG] = 0;
 576     rd->thresh_mult[THR_NEARESTA] = 0;
 577   }
 578
 579   rd->thresh_mult[THR_DC] += 1000;
 580
 581   rd->thresh_mult[THR_NEWMV] += 1000;
 582   rd->thresh_mult[THR_NEWA] += 1000;
 583   rd->thresh_mult[THR_NEWG] += 1000;
 584
 585   rd->thresh_mult[THR_NEARMV] += 1000;
 586   rd->thresh_mult[THR_NEARA] += 1000;
 587   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
 588   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
 589
 590   rd->thresh_mult[THR_TM] += 1000;
 591
 592   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
 593   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
 594   rd->thresh_mult[THR_NEARG] += 1000;
 595   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
 596   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
 597
 598   rd->thresh_mult[THR_ZEROMV] += 2000;
 599   rd->thresh_mult[THR_ZEROG] += 2000;
 600   rd->thresh_mult[THR_ZEROA] += 2000;
 601   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
 602   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
 603
 604   rd->thresh_mult[THR_H_PRED] += 2000;
 605   rd->thresh_mult[THR_V_PRED] += 2000;
 606   rd->thresh_mult[THR_D45_PRED ] += 2500;
 607   rd->thresh_mult[THR_D135_PRED] += 2500;
 608   rd->thresh_mult[THR_D117_PRED] += 2500;
 609   rd->thresh_mult[THR_D153_PRED] += 2500;
 610   rd->thresh_mult[THR_D207_PRED] += 2500;
 611   rd->thresh_mult[THR_D63_PRED] += 2500;
 612 }
 613
 614 void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
 615   static const int thresh_mult[2][MAX_REFS] =
 616       {{2500, 2500, 2500, 4500, 4500, 2500},
 617        {2000, 2000, 2000, 4000, 4000, 2000}};
 618   RD_OPT *const rd = &cpi->rd;
 619   const int idx = cpi->oxcf.mode == BEST;
 620   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
 621 }
 622
 623 void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
 624                                int bsize, int best_mode_index) {
 625   if (rd_thresh > 0) {
 626     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
 627     int mode;
 628     for (mode = 0; mode < top_mode; ++mode) {
 629       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
 630       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
 631       BLOCK_SIZE bs;
 632       for (bs = min_size; bs <= max_size; ++bs) {
 633         int *const fact = &factor_buf[bs][mode];
 634         if (mode == best_mode_index) {
 635           *fact -= (*fact >> 4);
 636         } else {
 637           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
 638         }
 639       }
 640     }
 641   }
 642 }
 643
 644 int vp10_get_intra_cost_penalty(int qindex, int qdelta,
 645                                vpx_bit_depth_t bit_depth) {
 646   const int q = vp10_dc_quant(qindex, qdelta, bit_depth);
 647 #if CONFIG_VP9_HIGHBITDEPTH
 648   switch (bit_depth) {
 649     case VPX_BITS_8:
 650       return 20 * q;
 651     case VPX_BITS_10:
 652       return 5 * q;
 653     case VPX_BITS_12:
 654       return ROUND_POWER_OF_TWO(5 * q, 2);
 655     default:
 656       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
 657       return -1;
 658   }
 659 #else
 660   return 20 * q;
 661 #endif  // CONFIG_VP9_HIGHBITDEPTH
 662 }
 663