granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <limits.h>
  13 #include <math.h>
  14 #include <stdio.h>
  15
  16 #include "./vp9_rtcd.h"
  17
  18 #include "vpx_mem/vpx_mem.h"
  19
  20 #include "vp9/common/vp9_common.h"
  21 #include "vp9/common/vp9_entropy.h"
  22 #include "vp9/common/vp9_entropymode.h"
  23 #include "vp9/common/vp9_idct.h"
  24 #include "vp9/common/vp9_mvref_common.h"
  25 #include "vp9/common/vp9_pragmas.h"
  26 #include "vp9/common/vp9_pred_common.h"
  27 #include "vp9/common/vp9_quant_common.h"
  28 #include "vp9/common/vp9_reconinter.h"
  29 #include "vp9/common/vp9_reconintra.h"
  30 #include "vp9/common/vp9_seg_common.h"
  31 #include "vp9/common/vp9_systemdependent.h"
  32
  33 #include "vp9/encoder/vp9_cost.h"
  34 #include "vp9/encoder/vp9_encodemb.h"
  35 #include "vp9/encoder/vp9_encodemv.h"
  36 #include "vp9/encoder/vp9_encoder.h"
  37 #include "vp9/encoder/vp9_mcomp.h"
  38 #include "vp9/encoder/vp9_quantize.h"
  39 #include "vp9/encoder/vp9_ratectrl.h"
  40 #include "vp9/encoder/vp9_rdopt.h"
  41 #include "vp9/encoder/vp9_tokenize.h"
  42 #include "vp9/encoder/vp9_variance.h"
  43
  44 #define RD_THRESH_MAX_FACT 64
  45 #define RD_THRESH_INC      1
  46 #define RD_THRESH_POW      1.25
  47 #define RD_MULT_EPB_RATIO  64
  48
  49 /* Factor to weigh the rate for switchable interp filters */
  50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  51
  52 #define LAST_FRAME_MODE_MASK    0xFFEDCD60
  53 #define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
  54 #define ALT_REF_MODE_MASK       0xFFC648D0
  55
  56 #define MIN_EARLY_TERM_INDEX    3
  57
  58 typedef struct {
  59   PREDICTION_MODE mode;
  60   MV_REFERENCE_FRAME ref_frame[2];
  61 } MODE_DEFINITION;
  62
  63 typedef struct {
  64   MV_REFERENCE_FRAME ref_frame[2];
  65 } REF_DEFINITION;
  66
  67 struct rdcost_block_args {
  68   MACROBLOCK *x;
  69   ENTROPY_CONTEXT t_above[16];
  70   ENTROPY_CONTEXT t_left[16];
  71   int rate;
  72   int64_t dist;
  73   int64_t sse;
  74   int this_rate;
  75   int64_t this_dist;
  76   int64_t this_sse;
  77   int64_t this_rd;
  78   int64_t best_rd;
  79   int skip;
  80   int use_fast_coef_costing;
  81   const scan_order *so;
  82 };
  83
  84 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  85   {NEARESTMV, {LAST_FRAME,   NONE}},
  86   {NEARESTMV, {ALTREF_FRAME, NONE}},
  87   {NEARESTMV, {GOLDEN_FRAME, NONE}},
  88
  89   {DC_PRED,   {INTRA_FRAME,  NONE}},
  90
  91   {NEWMV,     {LAST_FRAME,   NONE}},
  92   {NEWMV,     {ALTREF_FRAME, NONE}},
  93   {NEWMV,     {GOLDEN_FRAME, NONE}},
  94
  95   {NEARMV,    {LAST_FRAME,   NONE}},
  96   {NEARMV,    {ALTREF_FRAME, NONE}},
  97   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  98   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
  99
 100   {TM_PRED,   {INTRA_FRAME,  NONE}},
 101
 102   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
 103   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
 104   {NEARMV,    {GOLDEN_FRAME, NONE}},
 105   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 106   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
 107
 108   {ZEROMV,    {LAST_FRAME,   NONE}},
 109   {ZEROMV,    {GOLDEN_FRAME, NONE}},
 110   {ZEROMV,    {ALTREF_FRAME, NONE}},
 111   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
 112   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 113
 114   {H_PRED,    {INTRA_FRAME,  NONE}},
 115   {V_PRED,    {INTRA_FRAME,  NONE}},
 116   {D135_PRED, {INTRA_FRAME,  NONE}},
 117   {D207_PRED, {INTRA_FRAME,  NONE}},
 118   {D153_PRED, {INTRA_FRAME,  NONE}},
 119   {D63_PRED,  {INTRA_FRAME,  NONE}},
 120   {D117_PRED, {INTRA_FRAME,  NONE}},
 121   {D45_PRED,  {INTRA_FRAME,  NONE}},
 122 };
 123
 124 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 125   {{LAST_FRAME,   NONE}},
 126   {{GOLDEN_FRAME, NONE}},
 127   {{ALTREF_FRAME, NONE}},
 128   {{LAST_FRAME,   ALTREF_FRAME}},
 129   {{GOLDEN_FRAME, ALTREF_FRAME}},
 130   {{INTRA_FRAME,  NONE}},
 131 };
 132
 133 // The baseline rd thresholds for breaking out of the rd loop for
 134 // certain modes are assumed to be based on 8x8 blocks.
 135 // This table is used to correct for blocks size.
 136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
 137 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
 138   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
 139 };
 140
 141 static int raster_block_offset(BLOCK_SIZE plane_bsize,
 142                                int raster_block, int stride) {
 143   const int bw = b_width_log2(plane_bsize);
 144   const int y = 4 * (raster_block >> bw);
 145   const int x = 4 * (raster_block & ((1 << bw) - 1));
 146   return y * stride + x;
 147 }
 148 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 149                                           int raster_block, int16_t *base) {
 150   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 151   return base + raster_block_offset(plane_bsize, raster_block, stride);
 152 }
 153
 154 static void fill_mode_costs(VP9_COMP *cpi) {
 155   const FRAME_CONTEXT *const fc = &cpi->common.fc;
 156   int i, j;
 157
 158   for (i = 0; i < INTRA_MODES; i++)
 159     for (j = 0; j < INTRA_MODES; j++)
 160       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
 161                       vp9_intra_mode_tree);
 162
 163   // TODO(rbultje) separate tables for superblock costing?
 164   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
 165   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
 166                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 167   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
 168                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 169
 170   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
 171     vp9_cost_tokens(cpi->switchable_interp_costs[i],
 172                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
 173 }
 174
 175 static void fill_token_costs(vp9_coeff_cost *c,
 176                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
 177   int i, j, k, l;
 178   TX_SIZE t;
 179   for (t = TX_4X4; t <= TX_32X32; ++t)
 180     for (i = 0; i < PLANE_TYPES; ++i)
 181       for (j = 0; j < REF_TYPES; ++j)
 182         for (k = 0; k < COEF_BANDS; ++k)
 183           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
 184             vp9_prob probs[ENTROPY_NODES];
 185             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
 186             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
 187                             vp9_coef_tree);
 188             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
 189                                  vp9_coef_tree);
 190             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
 191                    c[t][i][j][k][1][l][EOB_TOKEN]);
 192           }
 193 }
 194
 195 static const uint8_t rd_iifactor[32] = {
 196   4, 4, 3, 2, 1, 0, 0, 0,
 197   0, 0, 0, 0, 0, 0, 0, 0,
 198   0, 0, 0, 0, 0, 0, 0, 0,
 199   0, 0, 0, 0, 0, 0, 0, 0,
 200 };
 201
 202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
 203
 204 /* values are now correlated to quantizer */
 205 static int sad_per_bit16lut[QINDEX_RANGE];
 206 static int sad_per_bit4lut[QINDEX_RANGE];
 207
 208 void vp9_init_me_luts() {
 209   int i;
 210
 211   // Initialize the sad lut tables using a formulaic calculation for now
 212   // This is to make it easier to resolve the impact of experimental changes
 213   // to the quantizer tables.
 214   for (i = 0; i < QINDEX_RANGE; i++) {
 215     const double q = vp9_convert_qindex_to_q(i);
 216     sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
 217     sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
 218   }
 219 }
 220
 221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
 222   const int q = vp9_dc_quant(qindex, 0);
 223   // TODO(debargha): Adjust the function below
 224   int rdmult = 88 * q * q / 25;
 225   if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 226     if (cpi->twopass.next_iiratio > 31)
 227       rdmult += (rdmult * rd_iifactor[31]) >> 4;
 228     else
 229       rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
 230   }
 231   return rdmult;
 232 }
 233
 234 static int compute_rd_thresh_factor(int qindex) {
 235   // TODO(debargha): Adjust the function below
 236   const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
 237   return MAX(q, 8);
 238 }
 239
 240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 241   cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
 242   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
 243 }
 244
 245 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
 246   int i, bsize, segment_id;
 247
 248   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 249     const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
 250                                             cm->base_qindex) + cm->y_dc_delta_q,
 251                              0, MAXQ);
 252     const int q = compute_rd_thresh_factor(qindex);
 253
 254     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 255       // Threshold here seems unnecessarily harsh but fine given actual
 256       // range of values used for cpi->sf.thresh_mult[].
 257       const int t = q * rd_thresh_block_size_factor[bsize];
 258       const int thresh_max = INT_MAX / t;
 259
 260       if (bsize >= BLOCK_8X8) {
 261         for (i = 0; i < MAX_MODES; ++i)
 262           rd->threshes[segment_id][bsize][i] =
 263               rd->thresh_mult[i] < thresh_max
 264                   ? rd->thresh_mult[i] * t / 4
 265                   : INT_MAX;
 266       } else {
 267         for (i = 0; i < MAX_REFS; ++i)
 268           rd->threshes[segment_id][bsize][i] =
 269               rd->thresh_mult_sub8x8[i] < thresh_max
 270                   ? rd->thresh_mult_sub8x8[i] * t / 4
 271                   : INT_MAX;
 272       }
 273     }
 274   }
 275 }
 276
 277 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 278   VP9_COMMON *const cm = &cpi->common;
 279   MACROBLOCK *const x = &cpi->mb;
 280   RD_OPT *const rd = &cpi->rd;
 281   int i;
 282
 283   vp9_clear_system_state();
 284
 285   rd->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
 286   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 287
 288   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
 289   x->errorperbit += (x->errorperbit == 0);
 290
 291   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 292                          cm->frame_type != KEY_FRAME) ? 0 : 1;
 293
 294   set_block_thresholds(cm, rd);
 295
 296   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
 297     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 298
 299     for (i = 0; i < PARTITION_CONTEXTS; i++)
 300       vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
 301                       vp9_partition_tree);
 302   }
 303
 304   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
 305       cm->frame_type == KEY_FRAME) {
 306     fill_mode_costs(cpi);
 307
 308     if (!frame_is_intra_only(cm)) {
 309       vp9_build_nmv_cost_table(x->nmvjointcost,
 310                                cm->allow_high_precision_mv ? x->nmvcost_hp
 311                                                            : x->nmvcost,
 312                                &cm->fc.nmvc, cm->allow_high_precision_mv);
 313
 314       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 315         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
 316                         cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
 317     }
 318   }
 319 }
 320
 321 static const int MAX_XSQ_Q10 = 245727;
 322
 323 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 324   // NOTE: The tables below must be of the same size
 325
 326   // The functions described below are sampled at the four most significant
 327   // bits of x^2 + 8 / 256
 328
 329   // Normalized rate
 330   // This table models the rate for a Laplacian source
 331   // source with given variance when quantized with a uniform quantizer
 332   // with given stepsize. The closed form expression is:
 333   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 334   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 335   // and H(x) is the binary entropy function.
 336   static const int rate_tab_q10[] = {
 337     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 338      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 339      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 340      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 341      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 342      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 343      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 344      1159,  1086,  1021,   963,   911,   864,   821,   781,
 345       745,   680,   623,   574,   530,   490,   455,   424,
 346       395,   345,   304,   269,   239,   213,   190,   171,
 347       154,   126,   104,    87,    73,    61,    52,    44,
 348        38,    28,    21,    16,    12,    10,     8,     6,
 349         5,     3,     2,     1,     1,     1,     0,     0,
 350   };
 351   // Normalized distortion
 352   // This table models the normalized distortion for a Laplacian source
 353   // source with given variance when quantized with a uniform quantizer
 354   // with given stepsize. The closed form expression is:
 355   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 356   // where x = qpstep / sqrt(variance)
 357   // Note the actual distortion is Dn * variance.
 358   static const int dist_tab_q10[] = {
 359        0,     0,     1,     1,     1,     2,     2,     2,
 360        3,     3,     4,     5,     5,     6,     7,     7,
 361        8,     9,    11,    12,    13,    15,    16,    17,
 362       18,    21,    24,    26,    29,    31,    34,    36,
 363       39,    44,    49,    54,    59,    64,    69,    73,
 364       78,    88,    97,   106,   115,   124,   133,   142,
 365      151,   167,   184,   200,   215,   231,   245,   260,
 366      274,   301,   327,   351,   375,   397,   418,   439,
 367      458,   495,   528,   559,   587,   613,   637,   659,
 368      680,   717,   749,   777,   801,   823,   842,   859,
 369      874,   899,   919,   936,   949,   960,   969,   977,
 370      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 371     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 372   };
 373   static const int xsq_iq_q10[] = {
 374          0,      4,      8,     12,     16,     20,     24,     28,
 375         32,     40,     48,     56,     64,     72,     80,     88,
 376         96,    112,    128,    144,    160,    176,    192,    208,
 377        224,    256,    288,    320,    352,    384,    416,    448,
 378        480,    544,    608,    672,    736,    800,    864,    928,
 379        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 380       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 381       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 382       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 383      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 384      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 385      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 386     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 387   };
 388   /*
 389   static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
 390   assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
 391   assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
 392   assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
 393   */
 394   int tmp = (xsq_q10 >> 2) + 8;
 395   int k = get_msb(tmp) - 3;
 396   int xq = (k << 3) + ((tmp >> k) & 0x7);
 397   const int one_q10 = 1 << 10;
 398   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 399   const int b_q10 = one_q10 - a_q10;
 400   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 401   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 402 }
 403
 404 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
 405                                   unsigned int qstep, int *rate,
 406                                   int64_t *dist) {
 407   // This function models the rate and distortion for a Laplacian
 408   // source with given variance when quantized with a uniform quantizer
 409   // with given stepsize. The closed form expressions are in:
 410   // Hang and Chen, "Source Model for transform video coder and its
 411   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 412   // Sys. for Video Tech., April 1997.
 413   if (var == 0) {
 414     *rate = 0;
 415     *dist = 0;
 416   } else {
 417     int d_q10, r_q10;
 418     const uint64_t xsq_q10_64 =
 419         ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
 420     const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
 421                         MAX_XSQ_Q10 : (int)xsq_q10_64;
 422     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 423     *rate = (n * r_q10 + 2) >> 2;
 424     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 425   }
 426 }
 427
 428 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 429                             MACROBLOCK *x, MACROBLOCKD *xd,
 430                             int *out_rate_sum, int64_t *out_dist_sum) {
 431   // Note our transform coeffs are 8 times an orthogonal transform.
 432   // Hence quantizer step is also 8 times. To get effective quantizer
 433   // we need to divide by 8 before sending to modeling function.
 434   int i;
 435   int64_t rate_sum = 0;
 436   int64_t dist_sum = 0;
 437   const int ref = xd->mi[0]->mbmi.ref_frame[0];
 438   unsigned int sse;
 439
 440   for (i = 0; i < MAX_MB_PLANE; ++i) {
 441     struct macroblock_plane *const p = &x->plane[i];
 442     struct macroblockd_plane *const pd = &xd->plane[i];
 443     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 444
 445     (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
 446                               pd->dst.buf, pd->dst.stride, &sse);
 447
 448     if (i == 0)
 449       x->pred_sse[ref] = sse;
 450
 451     // Fast approximate the modelling function.
 452     if (cpi->oxcf.speed > 4) {
 453       int64_t rate;
 454       int64_t dist;
 455       int64_t square_error = sse;
 456       int quantizer = (pd->dequant[1] >> 3);
 457
 458       if (quantizer < 120)
 459         rate = (square_error * (280 - quantizer)) >> 8;
 460       else
 461         rate = 0;
 462       dist = (square_error * quantizer) >> 8;
 463       rate_sum += rate;
 464       dist_sum += dist;
 465     } else {
 466       int rate;
 467       int64_t dist;
 468       vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
 469                                    pd->dequant[1] >> 3, &rate, &dist);
 470       rate_sum += rate;
 471       dist_sum += dist;
 472     }
 473   }
 474
 475   *out_rate_sum = (int)rate_sum;
 476   *out_dist_sum = dist_sum << 4;
 477 }
 478
 479 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
 480                                  TX_SIZE tx_size,
 481                                  MACROBLOCK *x, MACROBLOCKD *xd,
 482                                  int *out_rate_sum, int64_t *out_dist_sum,
 483                                  int *out_skip) {
 484   int j, k;
 485   BLOCK_SIZE bs;
 486   const struct macroblock_plane *const p = &x->plane[0];
 487   const struct macroblockd_plane *const pd = &xd->plane[0];
 488   const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
 489   const int height = 4 * num_4x4_blocks_high_lookup[bsize];
 490   int rate_sum = 0;
 491   int64_t dist_sum = 0;
 492   const int t = 4 << tx_size;
 493
 494   if (tx_size == TX_4X4) {
 495     bs = BLOCK_4X4;
 496   } else if (tx_size == TX_8X8) {
 497     bs = BLOCK_8X8;
 498   } else if (tx_size == TX_16X16) {
 499     bs = BLOCK_16X16;
 500   } else if (tx_size == TX_32X32) {
 501     bs = BLOCK_32X32;
 502   } else {
 503     assert(0);
 504   }
 505
 506   *out_skip = 1;
 507   for (j = 0; j < height; j += t) {
 508     for (k = 0; k < width; k += t) {
 509       int rate;
 510       int64_t dist;
 511       unsigned int sse;
 512       cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
 513                          &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
 514                          &sse);
 515       // sse works better than var, since there is no dc prediction used
 516       vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
 517                                    &rate, &dist);
 518       rate_sum += rate;
 519       dist_sum += dist;
 520       *out_skip &= (rate < 1024);
 521     }
 522   }
 523
 524   *out_rate_sum = rate_sum;
 525   *out_dist_sum = dist_sum << 4;
 526 }
 527
 528 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
 529                           intptr_t block_size, int64_t *ssz) {
 530   int i;
 531   int64_t error = 0, sqcoeff = 0;
 532
 533   for (i = 0; i < block_size; i++) {
 534     const int diff = coeff[i] - dqcoeff[i];
 535     error +=  diff * diff;
 536     sqcoeff += coeff[i] * coeff[i];
 537   }
 538
 539   *ssz = sqcoeff;
 540   return error;
 541 }
 542
 543 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
 544  * decide whether to include cost of a trailing EOB node or not (i.e. we
 545  * can skip this if the last coefficient in this transform block, e.g. the
 546  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 547  * were non-zero). */
 548 static const int16_t band_counts[TX_SIZES][8] = {
 549   { 1, 2, 3, 4,  3,   16 - 13, 0 },
 550   { 1, 2, 3, 4, 11,   64 - 21, 0 },
 551   { 1, 2, 3, 4, 11,  256 - 21, 0 },
 552   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 553 };
 554 static INLINE int cost_coeffs(MACROBLOCK *x,
 555                               int plane, int block,
 556                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
 557                               TX_SIZE tx_size,
 558                               const int16_t *scan, const int16_t *nb,
 559                               int use_fast_coef_costing) {
 560   MACROBLOCKD *const xd = &x->e_mbd;
 561   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 562   const struct macroblock_plane *p = &x->plane[plane];
 563   const struct macroblockd_plane *pd = &xd->plane[plane];
 564   const PLANE_TYPE type = pd->plane_type;
 565   const int16_t *band_count = &band_counts[tx_size][1];
 566   const int eob = p->eobs[block];
 567   const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 568   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
 569                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
 570   uint8_t token_cache[32 * 32];
 571   int pt = combine_entropy_contexts(*A, *L);
 572   int c, cost;
 573   // Check for consistency of tx_size with mode info
 574   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
 575                               : get_uv_tx_size(mbmi) == tx_size);
 576
 577   if (eob == 0) {
 578     // single eob token
 579     cost = token_costs[0][0][pt][EOB_TOKEN];
 580     c = 0;
 581   } else {
 582     int band_left = *band_count++;
 583
 584     // dc token
 585     int v = qcoeff[0];
 586     int prev_t = vp9_dct_value_tokens_ptr[v].token;
 587     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
 588     token_cache[0] = vp9_pt_energy_class[prev_t];
 589     ++token_costs;
 590
 591     // ac tokens
 592     for (c = 1; c < eob; c++) {
 593       const int rc = scan[c];
 594       int t;
 595
 596       v = qcoeff[rc];
 597       t = vp9_dct_value_tokens_ptr[v].token;
 598       if (use_fast_coef_costing) {
 599         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
 600       } else {
 601         pt = get_coef_context(nb, token_cache, c);
 602         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
 603         token_cache[rc] = vp9_pt_energy_class[t];
 604       }
 605       prev_t = t;
 606       if (!--band_left) {
 607         band_left = *band_count++;
 608         ++token_costs;
 609       }
 610     }
 611
 612     // eob token
 613     if (band_left) {
 614       if (use_fast_coef_costing) {
 615         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
 616       } else {
 617         pt = get_coef_context(nb, token_cache, c);
 618         cost += (*token_costs)[0][pt][EOB_TOKEN];
 619       }
 620     }
 621   }
 622
 623   // is eob first coefficient;
 624   *A = *L = (c > 0);
 625
 626   return cost;
 627 }
 628 static void dist_block(int plane, int block, TX_SIZE tx_size,
 629                        struct rdcost_block_args* args) {
 630   const int ss_txfrm_size = tx_size << 1;
 631   MACROBLOCK* const x = args->x;
 632   MACROBLOCKD* const xd = &x->e_mbd;
 633   const struct macroblock_plane *const p = &x->plane[plane];
 634   const struct macroblockd_plane *const pd = &xd->plane[plane];
 635   int64_t this_sse;
 636   int shift = tx_size == TX_32X32 ? 0 : 2;
 637   int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 638   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 639   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 640                                &this_sse) >> shift;
 641   args->sse  = this_sse >> shift;
 642
 643   if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
 644     // TODO(jingning): tune the model to better capture the distortion.
 645     int64_t p = (pd->dequant[1] * pd->dequant[1] *
 646                     (1 << ss_txfrm_size)) >> (shift + 2);
 647     args->dist += (p >> 4);
 648     args->sse  += p;
 649   }
 650 }
 651
 652 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 653                        TX_SIZE tx_size, struct rdcost_block_args* args) {
 654   int x_idx, y_idx;
 655   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 656
 657   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
 658                            args->t_left + y_idx, tx_size,
 659                            args->so->scan, args->so->neighbors,
 660                            args->use_fast_coef_costing);
 661 }
 662
 663 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 664                           TX_SIZE tx_size, void *arg) {
 665   struct rdcost_block_args *args = arg;
 666   MACROBLOCK *const x = args->x;
 667   MACROBLOCKD *const xd = &x->e_mbd;
 668   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 669   int64_t rd1, rd2, rd;
 670
 671   if (args->skip)
 672     return;
 673
 674   if (!is_inter_block(mbmi))
 675     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
 676   else
 677     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 678
 679   dist_block(plane, block, tx_size, args);
 680   rate_block(plane, block, plane_bsize, tx_size, args);
 681   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
 682   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
 683
 684   // TODO(jingning): temporarily enabled only for luma component
 685   rd = MIN(rd1, rd2);
 686   if (plane == 0)
 687     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
 688                                     (rd1 > rd2 && !xd->lossless);
 689
 690   args->this_rate += args->rate;
 691   args->this_dist += args->dist;
 692   args->this_sse  += args->sse;
 693   args->this_rd += rd;
 694
 695   if (args->this_rd > args->best_rd) {
 696     args->skip = 1;
 697     return;
 698   }
 699 }
 700
 701 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 702                               const struct macroblockd_plane *pd,
 703                               ENTROPY_CONTEXT t_above[16],
 704                               ENTROPY_CONTEXT t_left[16]) {
 705   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 706   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 707   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 708   const ENTROPY_CONTEXT *const above = pd->above_context;
 709   const ENTROPY_CONTEXT *const left = pd->left_context;
 710
 711   int i;
 712   switch (tx_size) {
 713     case TX_4X4:
 714       vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 715       vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 716       break;
 717     case TX_8X8:
 718       for (i = 0; i < num_4x4_w; i += 2)
 719         t_above[i] = !!*(const uint16_t *)&above[i];
 720       for (i = 0; i < num_4x4_h; i += 2)
 721         t_left[i] = !!*(const uint16_t *)&left[i];
 722       break;
 723     case TX_16X16:
 724       for (i = 0; i < num_4x4_w; i += 4)
 725         t_above[i] = !!*(const uint32_t *)&above[i];
 726       for (i = 0; i < num_4x4_h; i += 4)
 727         t_left[i] = !!*(const uint32_t *)&left[i];
 728       break;
 729     case TX_32X32:
 730       for (i = 0; i < num_4x4_w; i += 8)
 731         t_above[i] = !!*(const uint64_t *)&above[i];
 732       for (i = 0; i < num_4x4_h; i += 8)
 733         t_left[i] = !!*(const uint64_t *)&left[i];
 734       break;
 735     default:
 736       assert(0 && "Invalid transform size.");
 737   }
 738 }
 739
 740 static void txfm_rd_in_plane(MACROBLOCK *x,
 741                              int *rate, int64_t *distortion,
 742                              int *skippable, int64_t *sse,
 743                              int64_t ref_best_rd, int plane,
 744                              BLOCK_SIZE bsize, TX_SIZE tx_size,
 745                              int use_fast_coef_casting) {
 746   MACROBLOCKD *const xd = &x->e_mbd;
 747   const struct macroblockd_plane *const pd = &xd->plane[plane];
 748   struct rdcost_block_args args = { 0 };
 749   args.x = x;
 750   args.best_rd = ref_best_rd;
 751   args.use_fast_coef_costing = use_fast_coef_casting;
 752
 753   if (plane == 0)
 754     xd->mi[0]->mbmi.tx_size = tx_size;
 755
 756   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 757
 758   args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 759
 760   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
 761                                          block_rd_txfm, &args);
 762   if (args.skip) {
 763     *rate       = INT_MAX;
 764     *distortion = INT64_MAX;
 765     *sse        = INT64_MAX;
 766     *skippable  = 0;
 767   } else {
 768     *distortion = args.this_dist;
 769     *rate       = args.this_rate;
 770     *sse        = args.this_sse;
 771     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
 772   }
 773 }
 774
 775 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
 776                                      int *rate, int64_t *distortion,
 777                                      int *skip, int64_t *sse,
 778                                      int64_t ref_best_rd,
 779                                      BLOCK_SIZE bs) {
 780   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 781   VP9_COMMON *const cm = &cpi->common;
 782   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 783   MACROBLOCKD *const xd = &x->e_mbd;
 784   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 785
 786   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 787
 788   txfm_rd_in_plane(x, rate, distortion, skip,
 789                    &sse[mbmi->tx_size], ref_best_rd, 0, bs,
 790                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 791   cpi->tx_stepdown_count[0]++;
 792 }
 793
 794 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 795                                      int (*r)[2], int *rate,
 796                                      int64_t *d, int64_t *distortion,
 797                                      int *s, int *skip,
 798                                      int64_t tx_cache[TX_MODES],
 799                                      BLOCK_SIZE bs) {
 800   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 801   VP9_COMMON *const cm = &cpi->common;
 802   MACROBLOCKD *const xd = &x->e_mbd;
 803   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 804   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 805   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 806                              {INT64_MAX, INT64_MAX},
 807                              {INT64_MAX, INT64_MAX},
 808                              {INT64_MAX, INT64_MAX}};
 809   int n, m;
 810   int s0, s1;
 811   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 812   int64_t best_rd = INT64_MAX;
 813   TX_SIZE best_tx = TX_4X4;
 814
 815   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 816   assert(skip_prob > 0);
 817   s0 = vp9_cost_bit(skip_prob, 0);
 818   s1 = vp9_cost_bit(skip_prob, 1);
 819
 820   for (n = TX_4X4; n <= max_tx_size; n++) {
 821     r[n][1] = r[n][0];
 822     if (r[n][0] < INT_MAX) {
 823       for (m = 0; m <= n - (n == max_tx_size); m++) {
 824         if (m == n)
 825           r[n][1] += vp9_cost_zero(tx_probs[m]);
 826         else
 827           r[n][1] += vp9_cost_one(tx_probs[m]);
 828       }
 829     }
 830     if (d[n] == INT64_MAX) {
 831       rd[n][0] = rd[n][1] = INT64_MAX;
 832     } else if (s[n]) {
 833       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 834     } else {
 835       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 836       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 837     }
 838
 839     if (rd[n][1] < best_rd) {
 840       best_tx = n;
 841       best_rd = rd[n][1];
 842     }
 843   }
 844   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 845                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 846
 847
 848   *distortion = d[mbmi->tx_size];
 849   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
 850   *skip       = s[mbmi->tx_size];
 851
 852   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
 853   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
 854   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
 855   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
 856
 857   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 858     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 859     cpi->tx_stepdown_count[0]++;
 860   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
 861     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 862     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
 863   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
 864     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
 865     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
 866   } else {
 867     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
 868     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
 869   }
 870 }
 871
 872 static int64_t scaled_rd_cost(int rdmult, int rddiv,
 873                               int rate, int64_t dist, double scale) {
 874   return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
 875 }
 876
 877 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
 878                                           int (*r)[2], int *rate,
 879                                           int64_t *d, int64_t *distortion,
 880                                           int *s, int *skip, int64_t *sse,
 881                                           int64_t ref_best_rd,
 882                                           BLOCK_SIZE bs) {
 883   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 884   VP9_COMMON *const cm = &cpi->common;
 885   MACROBLOCKD *const xd = &x->e_mbd;
 886   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 887   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 888   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 889                              {INT64_MAX, INT64_MAX},
 890                              {INT64_MAX, INT64_MAX},
 891                              {INT64_MAX, INT64_MAX}};
 892   int n, m;
 893   int s0, s1;
 894   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
 895   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 896   int64_t best_rd = INT64_MAX;
 897   TX_SIZE best_tx = TX_4X4;
 898
 899   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 900   assert(skip_prob > 0);
 901   s0 = vp9_cost_bit(skip_prob, 0);
 902   s1 = vp9_cost_bit(skip_prob, 1);
 903
 904   for (n = TX_4X4; n <= max_tx_size; n++) {
 905     double scale = scale_rd[n];
 906     r[n][1] = r[n][0];
 907     for (m = 0; m <= n - (n == max_tx_size); m++) {
 908       if (m == n)
 909         r[n][1] += vp9_cost_zero(tx_probs[m]);
 910       else
 911         r[n][1] += vp9_cost_one(tx_probs[m]);
 912     }
 913     if (s[n]) {
 914       rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
 915                                            scale);
 916     } else {
 917       rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
 918                                 scale);
 919       rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
 920                                 scale);
 921     }
 922     if (rd[n][1] < best_rd) {
 923       best_rd = rd[n][1];
 924       best_tx = n;
 925     }
 926   }
 927
 928   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 929                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 930
 931   // Actually encode using the chosen mode if a model was used, but do not
 932   // update the r, d costs
 933   txfm_rd_in_plane(x, rate, distortion, skip,
 934                    &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
 935                    cpi->sf.use_fast_coef_costing);
 936
 937   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 938     cpi->tx_stepdown_count[0]++;
 939   } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
 940     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
 941   } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
 942     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
 943   } else {
 944     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
 945   }
 946 }
 947
 948 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 949                                   int64_t *distortion, int *skip,
 950                                   int64_t *psse, BLOCK_SIZE bs,
 951                                   int64_t txfm_cache[TX_MODES],
 952                                   int64_t ref_best_rd) {
 953   int r[TX_SIZES][2], s[TX_SIZES];
 954   int64_t d[TX_SIZES], sse[TX_SIZES];
 955   MACROBLOCKD *xd = &x->e_mbd;
 956   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 957   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 958   TX_SIZE tx_size;
 959
 960   assert(bs == mbmi->sb_type);
 961
 962   vp9_subtract_plane(x, bs, 0);
 963
 964   if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
 965     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
 966     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
 967                              ref_best_rd, bs);
 968     if (psse)
 969       *psse = sse[mbmi->tx_size];
 970     return;
 971   }
 972
 973   if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
 974     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
 975       model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
 976                            &r[tx_size][0], &d[tx_size], &s[tx_size]);
 977     choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
 978                                   skip, sse, ref_best_rd, bs);
 979   } else {
 980     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
 981       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
 982                        &s[tx_size], &sse[tx_size],
 983                        ref_best_rd, 0, bs, tx_size,
 984                        cpi->sf.use_fast_coef_costing);
 985     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
 986                              skip, txfm_cache, bs);
 987   }
 988   if (psse)
 989     *psse = sse[mbmi->tx_size];
 990 }
 991
 992 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 993                                   int64_t *distortion, int *skip,
 994                                   int64_t *psse, BLOCK_SIZE bs,
 995                                   int64_t txfm_cache[TX_MODES],
 996                                   int64_t ref_best_rd) {
 997   int64_t sse[TX_SIZES];
 998   MACROBLOCKD *xd = &x->e_mbd;
 999   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1000
1001   assert(bs == mbmi->sb_type);
1002   if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1003     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1004     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1005                              ref_best_rd, bs);
1006   } else {
1007     int r[TX_SIZES][2], s[TX_SIZES];
1008     int64_t d[TX_SIZES];
1009     TX_SIZE tx_size;
1010     for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1011       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1012                        &s[tx_size], &sse[tx_size],
1013                        ref_best_rd, 0, bs, tx_size,
1014                        cpi->sf.use_fast_coef_costing);
1015     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1016                              skip, txfm_cache, bs);
1017   }
1018   if (psse)
1019     *psse = sse[mbmi->tx_size];
1020 }
1021
1022
1023 static int conditional_skipintra(PREDICTION_MODE mode,
1024                                  PREDICTION_MODE best_intra_mode) {
1025   if (mode == D117_PRED &&
1026       best_intra_mode != V_PRED &&
1027       best_intra_mode != D135_PRED)
1028     return 1;
1029   if (mode == D63_PRED &&
1030       best_intra_mode != V_PRED &&
1031       best_intra_mode != D45_PRED)
1032     return 1;
1033   if (mode == D207_PRED &&
1034       best_intra_mode != H_PRED &&
1035       best_intra_mode != D45_PRED)
1036     return 1;
1037   if (mode == D153_PRED &&
1038       best_intra_mode != H_PRED &&
1039       best_intra_mode != D135_PRED)
1040     return 1;
1041   return 0;
1042 }
1043
1044 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1045                                      PREDICTION_MODE *best_mode,
1046                                      const int *bmode_costs,
1047                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1048                                      int *bestrate, int *bestratey,
1049                                      int64_t *bestdistortion,
1050                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
1051   PREDICTION_MODE mode;
1052   MACROBLOCKD *const xd = &x->e_mbd;
1053   int64_t best_rd = rd_thresh;
1054
1055   struct macroblock_plane *p = &x->plane[0];
1056   struct macroblockd_plane *pd = &xd->plane[0];
1057   const int src_stride = p->src.stride;
1058   const int dst_stride = pd->dst.stride;
1059   const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1060                                                             src_stride)];
1061   uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1062                                                        dst_stride)];
1063   ENTROPY_CONTEXT ta[2], tempa[2];
1064   ENTROPY_CONTEXT tl[2], templ[2];
1065
1066   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1067   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1068   int idx, idy;
1069   uint8_t best_dst[8 * 8];
1070
1071   assert(ib < 4);
1072
1073   vpx_memcpy(ta, a, sizeof(ta));
1074   vpx_memcpy(tl, l, sizeof(tl));
1075   xd->mi[0]->mbmi.tx_size = TX_4X4;
1076
1077   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1078     int64_t this_rd;
1079     int ratey = 0;
1080     int64_t distortion = 0;
1081     int rate = bmode_costs[mode];
1082
1083     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1084       continue;
1085
1086     // Only do the oblique modes if the best so far is
1087     // one of the neighboring directional modes
1088     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1089       if (conditional_skipintra(mode, *best_mode))
1090           continue;
1091     }
1092
1093     vpx_memcpy(tempa, ta, sizeof(ta));
1094     vpx_memcpy(templ, tl, sizeof(tl));
1095
1096     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1097       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1098         const int block = ib + idy * 2 + idx;
1099         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1100         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1101         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1102                                                             p->src_diff);
1103         int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1104         xd->mi[0]->bmi[block].as_mode = mode;
1105         vp9_predict_intra_block(xd, block, 1,
1106                                 TX_4X4, mode,
1107                                 x->skip_encode ? src : dst,
1108                                 x->skip_encode ? src_stride : dst_stride,
1109                                 dst, dst_stride, idx, idy, 0);
1110         vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1111
1112         if (xd->lossless) {
1113           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1114           vp9_fwht4x4(src_diff, coeff, 8);
1115           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1116           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1117                                so->scan, so->neighbors,
1118                                cpi->sf.use_fast_coef_costing);
1119           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1120             goto next;
1121           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1122                           p->eobs[block]);
1123         } else {
1124           int64_t unused;
1125           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1126           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1127           vp9_fht4x4(src_diff, coeff, 8, tx_type);
1128           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1129           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1130                              so->scan, so->neighbors,
1131                              cpi->sf.use_fast_coef_costing);
1132           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1133                                         16, &unused) >> 2;
1134           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1135             goto next;
1136           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1137                          dst, dst_stride, p->eobs[block]);
1138         }
1139       }
1140     }
1141
1142     rate += ratey;
1143     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1144
1145     if (this_rd < best_rd) {
1146       *bestrate = rate;
1147       *bestratey = ratey;
1148       *bestdistortion = distortion;
1149       best_rd = this_rd;
1150       *best_mode = mode;
1151       vpx_memcpy(a, tempa, sizeof(tempa));
1152       vpx_memcpy(l, templ, sizeof(templ));
1153       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1154         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1155                    num_4x4_blocks_wide * 4);
1156     }
1157   next:
1158     {}
1159   }
1160
1161   if (best_rd >= rd_thresh || x->skip_encode)
1162     return best_rd;
1163
1164   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1165     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1166                num_4x4_blocks_wide * 4);
1167
1168   return best_rd;
1169 }
1170
1171 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1172                                             int *rate, int *rate_y,
1173                                             int64_t *distortion,
1174                                             int64_t best_rd) {
1175   int i, j;
1176   const MACROBLOCKD *const xd = &mb->e_mbd;
1177   MODE_INFO *const mic = xd->mi[0];
1178   const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1179   const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1180   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1181   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1182   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1183   int idx, idy;
1184   int cost = 0;
1185   int64_t total_distortion = 0;
1186   int tot_rate_y = 0;
1187   int64_t total_rd = 0;
1188   ENTROPY_CONTEXT t_above[4], t_left[4];
1189   const int *bmode_costs = cpi->mbmode_cost;
1190
1191   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1192   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1193
1194   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1195   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1196     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1197       PREDICTION_MODE best_mode = DC_PRED;
1198       int r = INT_MAX, ry = INT_MAX;
1199       int64_t d = INT64_MAX, this_rd = INT64_MAX;
1200       i = idy * 2 + idx;
1201       if (cpi->common.frame_type == KEY_FRAME) {
1202         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1203         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1204
1205         bmode_costs  = cpi->y_mode_costs[A][L];
1206       }
1207
1208       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1209                                       t_above + idx, t_left + idy, &r, &ry, &d,
1210                                       bsize, best_rd - total_rd);
1211       if (this_rd >= best_rd - total_rd)
1212         return INT64_MAX;
1213
1214       total_rd += this_rd;
1215       cost += r;
1216       total_distortion += d;
1217       tot_rate_y += ry;
1218
1219       mic->bmi[i].as_mode = best_mode;
1220       for (j = 1; j < num_4x4_blocks_high; ++j)
1221         mic->bmi[i + j * 2].as_mode = best_mode;
1222       for (j = 1; j < num_4x4_blocks_wide; ++j)
1223         mic->bmi[i + j].as_mode = best_mode;
1224
1225       if (total_rd >= best_rd)
1226         return INT64_MAX;
1227     }
1228   }
1229
1230   *rate = cost;
1231   *rate_y = tot_rate_y;
1232   *distortion = total_distortion;
1233   mic->mbmi.mode = mic->bmi[3].as_mode;
1234
1235   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1236 }
1237
1238 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1239                                       int *rate, int *rate_tokenonly,
1240                                       int64_t *distortion, int *skippable,
1241                                       BLOCK_SIZE bsize,
1242                                       int64_t tx_cache[TX_MODES],
1243                                       int64_t best_rd) {
1244   PREDICTION_MODE mode;
1245   PREDICTION_MODE mode_selected = DC_PRED;
1246   MACROBLOCKD *const xd = &x->e_mbd;
1247   MODE_INFO *const mic = xd->mi[0];
1248   int this_rate, this_rate_tokenonly, s;
1249   int64_t this_distortion, this_rd;
1250   TX_SIZE best_tx = TX_4X4;
1251   int i;
1252   int *bmode_costs = cpi->mbmode_cost;
1253
1254   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1255     for (i = 0; i < TX_MODES; i++)
1256       tx_cache[i] = INT64_MAX;
1257
1258   /* Y Search for intra prediction mode */
1259   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1260     int64_t local_tx_cache[TX_MODES];
1261     MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1262     MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1263
1264     if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1265       continue;
1266
1267     if (cpi->common.frame_type == KEY_FRAME) {
1268       const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1269       const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1270
1271       bmode_costs = cpi->y_mode_costs[A][L];
1272     }
1273     mic->mbmi.mode = mode;
1274
1275     intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1276         &s, NULL, bsize, local_tx_cache, best_rd);
1277
1278     if (this_rate_tokenonly == INT_MAX)
1279       continue;
1280
1281     this_rate = this_rate_tokenonly + bmode_costs[mode];
1282     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1283
1284     if (this_rd < best_rd) {
1285       mode_selected   = mode;
1286       best_rd         = this_rd;
1287       best_tx         = mic->mbmi.tx_size;
1288       *rate           = this_rate;
1289       *rate_tokenonly = this_rate_tokenonly;
1290       *distortion     = this_distortion;
1291       *skippable      = s;
1292     }
1293
1294     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1295       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1296         const int64_t adj_rd = this_rd + local_tx_cache[i] -
1297             local_tx_cache[cpi->common.tx_mode];
1298         if (adj_rd < tx_cache[i]) {
1299           tx_cache[i] = adj_rd;
1300         }
1301       }
1302     }
1303   }
1304
1305   mic->mbmi.mode = mode_selected;
1306   mic->mbmi.tx_size = best_tx;
1307
1308   return best_rd;
1309 }
1310
1311 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1312                              int *rate, int64_t *distortion, int *skippable,
1313                              int64_t *sse, BLOCK_SIZE bsize,
1314                              int64_t ref_best_rd) {
1315   MACROBLOCKD *const xd = &x->e_mbd;
1316   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1317   TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1318   int plane;
1319   int pnrate = 0, pnskip = 1;
1320   int64_t pndist = 0, pnsse = 0;
1321
1322   if (ref_best_rd < 0)
1323     goto term;
1324
1325   if (is_inter_block(mbmi)) {
1326     int plane;
1327     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1328       vp9_subtract_plane(x, bsize, plane);
1329   }
1330
1331   *rate = 0;
1332   *distortion = 0;
1333   *sse = 0;
1334   *skippable = 1;
1335
1336   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1337     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1338                      ref_best_rd, plane, bsize, uv_txfm_size,
1339                      cpi->sf.use_fast_coef_costing);
1340     if (pnrate == INT_MAX)
1341       goto term;
1342     *rate += pnrate;
1343     *distortion += pndist;
1344     *sse += pnsse;
1345     *skippable &= pnskip;
1346   }
1347   return;
1348
1349   term:
1350   *rate = INT_MAX;
1351   *distortion = INT64_MAX;
1352   *sse = INT64_MAX;
1353   *skippable = 0;
1354   return;
1355 }
1356
1357 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1358                                        PICK_MODE_CONTEXT *ctx,
1359                                        int *rate, int *rate_tokenonly,
1360                                        int64_t *distortion, int *skippable,
1361                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1362   MACROBLOCKD *xd = &x->e_mbd;
1363   PREDICTION_MODE mode;
1364   PREDICTION_MODE mode_selected = DC_PRED;
1365   int64_t best_rd = INT64_MAX, this_rd;
1366   int this_rate_tokenonly, this_rate, s;
1367   int64_t this_distortion, this_sse;
1368
1369   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1370     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1371       continue;
1372
1373     xd->mi[0]->mbmi.uv_mode = mode;
1374
1375     super_block_uvrd(cpi, x, &this_rate_tokenonly,
1376                      &this_distortion, &s, &this_sse, bsize, best_rd);
1377     if (this_rate_tokenonly == INT_MAX)
1378       continue;
1379     this_rate = this_rate_tokenonly +
1380                 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
1381     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1382
1383     if (this_rd < best_rd) {
1384       mode_selected   = mode;
1385       best_rd         = this_rd;
1386       *rate           = this_rate;
1387       *rate_tokenonly = this_rate_tokenonly;
1388       *distortion     = this_distortion;
1389       *skippable      = s;
1390       if (!x->select_txfm_size) {
1391         int i;
1392         struct macroblock_plane *const p = x->plane;
1393         struct macroblockd_plane *const pd = xd->plane;
1394         for (i = 1; i < MAX_MB_PLANE; ++i) {
1395           p[i].coeff    = ctx->coeff_pbuf[i][2];
1396           p[i].qcoeff   = ctx->qcoeff_pbuf[i][2];
1397           pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1398           p[i].eobs    = ctx->eobs_pbuf[i][2];
1399
1400           ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];
1401           ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];
1402           ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1403           ctx->eobs_pbuf[i][2]    = ctx->eobs_pbuf[i][0];
1404
1405           ctx->coeff_pbuf[i][0]   = p[i].coeff;
1406           ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
1407           ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1408           ctx->eobs_pbuf[i][0]    = p[i].eobs;
1409         }
1410       }
1411     }
1412   }
1413
1414   xd->mi[0]->mbmi.uv_mode = mode_selected;
1415   return best_rd;
1416 }
1417
1418 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1419                               int *rate, int *rate_tokenonly,
1420                               int64_t *distortion, int *skippable,
1421                               BLOCK_SIZE bsize) {
1422   const VP9_COMMON *cm = &cpi->common;
1423   int64_t unused;
1424
1425   x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
1426   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1427                    skippable, &unused, bsize, INT64_MAX);
1428   *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1429   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1430 }
1431
1432 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1433                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1434                                  int *rate_uv, int *rate_uv_tokenonly,
1435                                  int64_t *dist_uv, int *skip_uv,
1436                                  PREDICTION_MODE *mode_uv) {
1437   MACROBLOCK *const x = &cpi->mb;
1438
1439   // Use an estimated rd for uv_intra based on DC_PRED if the
1440   // appropriate speed flag is set.
1441   if (cpi->sf.use_uv_intra_rd_estimate) {
1442     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1443                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1444   // Else do a proper rd search for each possible transform size that may
1445   // be considered in the main rd loop.
1446   } else {
1447     rd_pick_intra_sbuv_mode(cpi, x, ctx,
1448                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1449                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1450   }
1451   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1452 }
1453
1454 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1455                        int mode_context) {
1456   const MACROBLOCK *const x = &cpi->mb;
1457   const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
1458
1459   // Don't account for mode here if segment skip is enabled.
1460   if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1461     assert(is_inter_mode(mode));
1462     return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1463   } else {
1464     return 0;
1465   }
1466 }
1467
1468 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1469                                 BLOCK_SIZE bsize,
1470                                 int_mv *frame_mv,
1471                                 int mi_row, int mi_col,
1472                                 int_mv single_newmv[MAX_REF_FRAMES],
1473                                 int *rate_mv);
1474
1475 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1476                                 PREDICTION_MODE mode, int_mv this_mv[2],
1477                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1478                                 int_mv seg_mvs[MAX_REF_FRAMES],
1479                                 int_mv *best_ref_mv[2], const int *mvjcost,
1480                                 int *mvcost[2]) {
1481   MODE_INFO *const mic = xd->mi[0];
1482   const MB_MODE_INFO *const mbmi = &mic->mbmi;
1483   int thismvcost = 0;
1484   int idx, idy;
1485   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1486   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1487   const int is_compound = has_second_ref(mbmi);
1488
1489   switch (mode) {
1490     case NEWMV:
1491       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1492       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1493                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1494       if (is_compound) {
1495         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1496         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1497                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1498       }
1499       break;
1500     case NEARMV:
1501     case NEARESTMV:
1502       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1503       if (is_compound)
1504         this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1505       break;
1506     case ZEROMV:
1507       this_mv[0].as_int = 0;
1508       if (is_compound)
1509         this_mv[1].as_int = 0;
1510       break;
1511     default:
1512       break;
1513   }
1514
1515   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1516   if (is_compound)
1517     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1518
1519   mic->bmi[i].as_mode = mode;
1520
1521   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1522     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1523       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1524                  &mic->bmi[i], sizeof(mic->bmi[i]));
1525
1526   return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1527             thismvcost;
1528 }
1529
1530 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1531                                        MACROBLOCK *x,
1532                                        int64_t best_yrd,
1533                                        int i,
1534                                        int *labelyrate,
1535                                        int64_t *distortion, int64_t *sse,
1536                                        ENTROPY_CONTEXT *ta,
1537                                        ENTROPY_CONTEXT *tl,
1538                                        int mi_row, int mi_col) {
1539   int k;
1540   MACROBLOCKD *xd = &x->e_mbd;
1541   struct macroblockd_plane *const pd = &xd->plane[0];
1542   struct macroblock_plane *const p = &x->plane[0];
1543   MODE_INFO *const mi = xd->mi[0];
1544   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1545   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1546   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1547   int idx, idy;
1548
1549   const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1550                                                              p->src.stride)];
1551   uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1552                                                         pd->dst.stride)];
1553   int64_t thisdistortion = 0, thissse = 0;
1554   int thisrate = 0, ref;
1555   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1556   const int is_compound = has_second_ref(&mi->mbmi);
1557   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1558
1559   for (ref = 0; ref < 1 + is_compound; ++ref) {
1560     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1561                                                pd->pre[ref].stride)];
1562     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1563                               dst, pd->dst.stride,
1564                               &mi->bmi[i].as_mv[ref].as_mv,
1565                               &xd->block_refs[ref]->sf, width, height, ref,
1566                               kernel, MV_PRECISION_Q3,
1567                               mi_col * MI_SIZE + 4 * (i % 2),
1568                               mi_row * MI_SIZE + 4 * (i / 2));
1569   }
1570
1571   vp9_subtract_block(height, width,
1572                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1573                      src, p->src.stride,
1574                      dst, pd->dst.stride);
1575
1576   k = i;
1577   for (idy = 0; idy < height / 4; ++idy) {
1578     for (idx = 0; idx < width / 4; ++idx) {
1579       int64_t ssz, rd, rd1, rd2;
1580       int16_t* coeff;
1581
1582       k += (idy * 2 + idx);
1583       coeff = BLOCK_OFFSET(p->coeff, k);
1584       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1585                     coeff, 8);
1586       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1587       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1588                                         16, &ssz);
1589       thissse += ssz;
1590       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1591                               so->scan, so->neighbors,
1592                               cpi->sf.use_fast_coef_costing);
1593       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1594       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1595       rd = MIN(rd1, rd2);
1596       if (rd >= best_yrd)
1597         return INT64_MAX;
1598     }
1599   }
1600
1601   *distortion = thisdistortion >> 2;
1602   *labelyrate = thisrate;
1603   *sse = thissse >> 2;
1604
1605   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1606 }
1607
1608 typedef struct {
1609   int eobs;
1610   int brate;
1611   int byrate;
1612   int64_t bdist;
1613   int64_t bsse;
1614   int64_t brdcost;
1615   int_mv mvs[2];
1616   ENTROPY_CONTEXT ta[2];
1617   ENTROPY_CONTEXT tl[2];
1618 } SEG_RDSTAT;
1619
1620 typedef struct {
1621   int_mv *ref_mv[2];
1622   int_mv mvp;
1623
1624   int64_t segment_rd;
1625   int r;
1626   int64_t d;
1627   int64_t sse;
1628   int segment_yrate;
1629   PREDICTION_MODE modes[4];
1630   SEG_RDSTAT rdstat[4][INTER_MODES];
1631   int mvthresh;
1632 } BEST_SEG_INFO;
1633
1634 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1635   return (mv->row >> 3) < x->mv_row_min ||
1636          (mv->row >> 3) > x->mv_row_max ||
1637          (mv->col >> 3) < x->mv_col_min ||
1638          (mv->col >> 3) > x->mv_col_max;
1639 }
1640
1641 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1642   MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1643   struct macroblock_plane *const p = &x->plane[0];
1644   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1645
1646   p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1647   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1648   pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1649                                                        pd->pre[0].stride)];
1650   if (has_second_ref(mbmi))
1651     pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1652                                                          pd->pre[1].stride)];
1653 }
1654
1655 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1656                                   struct buf_2d orig_pre[2]) {
1657   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1658   x->plane[0].src = orig_src;
1659   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1660   if (has_second_ref(mbmi))
1661     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1662 }
1663
1664 static INLINE int mv_has_subpel(const MV *mv) {
1665   return (mv->row & 0x0F) || (mv->col & 0x0F);
1666 }
1667
1668 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1669 // TODO(aconverse): Find out if this is still productive then clean up or remove
1670 static int check_best_zero_mv(
1671     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1672     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1673     int disable_inter_mode_mask, int this_mode,
1674     const MV_REFERENCE_FRAME ref_frames[2]) {
1675   if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1676       (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1677       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1678       (ref_frames[1] == NONE ||
1679        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1680     int rfc = mode_context[ref_frames[0]];
1681     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1682     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1683     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1684
1685     if (this_mode == NEARMV) {
1686       if (c1 > c3) return 0;
1687     } else if (this_mode == NEARESTMV) {
1688       if (c2 > c3) return 0;
1689     } else {
1690       assert(this_mode == ZEROMV);
1691       if (ref_frames[1] == NONE) {
1692         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1693             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1694           return 0;
1695       } else {
1696         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1697              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1698             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1699              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1700           return 0;
1701       }
1702     }
1703   }
1704   return 1;
1705 }
1706
1707 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1708                                         const TileInfo * const tile,
1709                                         int_mv *best_ref_mv,
1710                                         int_mv *second_best_ref_mv,
1711                                         int64_t best_rd, int *returntotrate,
1712                                         int *returnyrate,
1713                                         int64_t *returndistortion,
1714                                         int *skippable, int64_t *psse,
1715                                         int mvthresh,
1716                                         int_mv seg_mvs[4][MAX_REF_FRAMES],
1717                                         BEST_SEG_INFO *bsi_buf, int filter_idx,
1718                                         int mi_row, int mi_col) {
1719   int i;
1720   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1721   MACROBLOCKD *xd = &x->e_mbd;
1722   MODE_INFO *mi = xd->mi[0];
1723   MB_MODE_INFO *mbmi = &mi->mbmi;
1724   int mode_idx;
1725   int k, br = 0, idx, idy;
1726   int64_t bd = 0, block_sse = 0;
1727   PREDICTION_MODE this_mode;
1728   VP9_COMMON *cm = &cpi->common;
1729   struct macroblock_plane *const p = &x->plane[0];
1730   struct macroblockd_plane *const pd = &xd->plane[0];
1731   const int label_count = 4;
1732   int64_t this_segment_rd = 0;
1733   int label_mv_thresh;
1734   int segmentyrate = 0;
1735   const BLOCK_SIZE bsize = mbmi->sb_type;
1736   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1737   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1738   vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1739   ENTROPY_CONTEXT t_above[2], t_left[2];
1740   int subpelmv = 1, have_ref = 0;
1741   const int has_second_rf = has_second_ref(mbmi);
1742   const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1743
1744   vp9_zero(*bsi);
1745
1746   bsi->segment_rd = best_rd;
1747   bsi->ref_mv[0] = best_ref_mv;
1748   bsi->ref_mv[1] = second_best_ref_mv;
1749   bsi->mvp.as_int = best_ref_mv->as_int;
1750   bsi->mvthresh = mvthresh;
1751
1752   for (i = 0; i < 4; i++)
1753     bsi->modes[i] = ZEROMV;
1754
1755   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1756   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1757
1758   // 64 makes this threshold really big effectively
1759   // making it so that we very rarely check mvs on
1760   // segments.   setting this to 1 would make mv thresh
1761   // roughly equal to what it is for macroblocks
1762   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1763
1764   // Segmentation method overheads
1765   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1766     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1767       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1768       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1769       int_mv mode_mv[MB_MODE_COUNT][2];
1770       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1771       PREDICTION_MODE mode_selected = ZEROMV;
1772       int64_t best_rd = INT64_MAX;
1773       const int i = idy * 2 + idx;
1774       int ref;
1775
1776       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1777         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1778         frame_mv[ZEROMV][frame].as_int = 0;
1779         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1780                                       &frame_mv[NEARESTMV][frame],
1781                                       &frame_mv[NEARMV][frame]);
1782       }
1783
1784       // search for the best motion vector on this segment
1785       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1786         const struct buf_2d orig_src = x->plane[0].src;
1787         struct buf_2d orig_pre[2];
1788
1789         mode_idx = INTER_OFFSET(this_mode);
1790         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1791         if (disable_inter_mode_mask & (1 << mode_idx))
1792           continue;
1793
1794         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1795                                 disable_inter_mode_mask,
1796                                 this_mode, mbmi->ref_frame))
1797           continue;
1798
1799         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1800         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1801                    sizeof(bsi->rdstat[i][mode_idx].ta));
1802         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1803                    sizeof(bsi->rdstat[i][mode_idx].tl));
1804
1805         // motion search for newmv (single predictor case only)
1806         if (!has_second_rf && this_mode == NEWMV &&
1807             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1808           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1809           int step_param = 0;
1810           int further_steps;
1811           int thissme, bestsme = INT_MAX;
1812           int sadpb = x->sadperbit4;
1813           MV mvp_full;
1814           int max_mv;
1815
1816           /* Is the best so far sufficiently good that we cant justify doing
1817            * and new motion search. */
1818           if (best_rd < label_mv_thresh)
1819             break;
1820
1821           if (!is_best_mode(cpi->oxcf.mode)) {
1822             // use previous block's result as next block's MV predictor.
1823             if (i > 0) {
1824               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1825               if (i == 2)
1826                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1827             }
1828           }
1829           if (i == 0)
1830             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1831           else
1832             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1833
1834           if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1835             // Take wtd average of the step_params based on the last frame's
1836             // max mv magnitude and the best ref mvs of the current block for
1837             // the given reference.
1838             step_param = (vp9_init_search_range(cpi, max_mv) +
1839                           cpi->mv_step_param) >> 1;
1840           } else {
1841             step_param = cpi->mv_step_param;
1842           }
1843
1844           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1845           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1846
1847           if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1848             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1849             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1850             step_param = MAX(step_param, 8);
1851           }
1852
1853           further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1854           // adjust src pointer for this block
1855           mi_buf_shift(x, i);
1856
1857           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1858
1859           if (cpi->sf.search_method == HEX) {
1860             bestsme = vp9_hex_search(x, &mvp_full,
1861                                      step_param,
1862                                      sadpb, 1, v_fn_ptr, 1,
1863                                      &bsi->ref_mv[0]->as_mv,
1864                                      new_mv);
1865             if (bestsme < INT_MAX)
1866               bestsme = vp9_get_mvpred_var(x, new_mv,
1867                                            &bsi->ref_mv[0]->as_mv,
1868                                            v_fn_ptr, 1);
1869           } else if (cpi->sf.search_method == SQUARE) {
1870             bestsme = vp9_square_search(x, &mvp_full,
1871                                         step_param,
1872                                         sadpb, 1, v_fn_ptr, 1,
1873                                         &bsi->ref_mv[0]->as_mv,
1874                                         new_mv);
1875             if (bestsme < INT_MAX)
1876               bestsme = vp9_get_mvpred_var(x, new_mv,
1877                                            &bsi->ref_mv[0]->as_mv,
1878                                            v_fn_ptr, 1);
1879           } else if (cpi->sf.search_method == BIGDIA) {
1880             bestsme = vp9_bigdia_search(x, &mvp_full,
1881                                         step_param,
1882                                         sadpb, 1, v_fn_ptr, 1,
1883                                         &bsi->ref_mv[0]->as_mv,
1884                                         new_mv);
1885             if (bestsme < INT_MAX)
1886               bestsme = vp9_get_mvpred_var(x, new_mv,
1887                                            &bsi->ref_mv[0]->as_mv,
1888                                            v_fn_ptr, 1);
1889           } else {
1890             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1891                                              sadpb, further_steps, 0, v_fn_ptr,
1892                                              &bsi->ref_mv[0]->as_mv,
1893                                              new_mv);
1894           }
1895
1896           // Should we do a full search (best quality only)
1897           if (is_best_mode(cpi->oxcf.mode)) {
1898             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1899             /* Check if mvp_full is within the range. */
1900             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1901                      x->mv_row_min, x->mv_row_max);
1902             thissme = cpi->full_search_sad(x, &mvp_full,
1903                                            sadpb, 16, v_fn_ptr,
1904                                            &bsi->ref_mv[0]->as_mv,
1905                                            &best_mv->as_mv);
1906             if (thissme < bestsme) {
1907               bestsme = thissme;
1908               *new_mv = best_mv->as_mv;
1909             } else {
1910               // The full search result is actually worse so re-instate the
1911               // previous best vector
1912               best_mv->as_mv = *new_mv;
1913             }
1914           }
1915
1916           if (bestsme < INT_MAX) {
1917             int distortion;
1918             cpi->find_fractional_mv_step(x,
1919                                          new_mv,
1920                                          &bsi->ref_mv[0]->as_mv,
1921                                          cm->allow_high_precision_mv,
1922                                          x->errorperbit, v_fn_ptr,
1923                                          cpi->sf.subpel_force_stop,
1924                                          cpi->sf.subpel_iters_per_step,
1925                                          x->nmvjointcost, x->mvcost,
1926                                          &distortion,
1927                                          &x->pred_sse[mbmi->ref_frame[0]]);
1928
1929             // save motion search result for use in compound prediction
1930             seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1931           }
1932
1933           if (cpi->sf.adaptive_motion_search)
1934             x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
1935
1936           // restore src pointers
1937           mi_buf_restore(x, orig_src, orig_pre);
1938         }
1939
1940         if (has_second_rf) {
1941           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1942               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1943             continue;
1944         }
1945
1946         if (has_second_rf && this_mode == NEWMV &&
1947             mbmi->interp_filter == EIGHTTAP) {
1948           // adjust src pointers
1949           mi_buf_shift(x, i);
1950           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1951             int rate_mv;
1952             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1953                                 mi_row, mi_col, seg_mvs[i],
1954                                 &rate_mv);
1955             seg_mvs[i][mbmi->ref_frame[0]].as_int =
1956                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1957             seg_mvs[i][mbmi->ref_frame[1]].as_int =
1958                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1959           }
1960           // restore src pointers
1961           mi_buf_restore(x, orig_src, orig_pre);
1962         }
1963
1964         bsi->rdstat[i][mode_idx].brate =
1965             set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1966                                  frame_mv, seg_mvs[i], bsi->ref_mv,
1967                                  x->nmvjointcost, x->mvcost);
1968
1969         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1970           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1971               mode_mv[this_mode][ref].as_int;
1972           if (num_4x4_blocks_wide > 1)
1973             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1974                 mode_mv[this_mode][ref].as_int;
1975           if (num_4x4_blocks_high > 1)
1976             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1977                 mode_mv[this_mode][ref].as_int;
1978         }
1979
1980         // Trap vectors that reach beyond the UMV borders
1981         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1982             (has_second_rf &&
1983              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1984           continue;
1985
1986         if (filter_idx > 0) {
1987           BEST_SEG_INFO *ref_bsi = bsi_buf;
1988           subpelmv = 0;
1989           have_ref = 1;
1990
1991           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1992             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1993             have_ref &= mode_mv[this_mode][ref].as_int ==
1994                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1995           }
1996
1997           if (filter_idx > 1 && !subpelmv && !have_ref) {
1998             ref_bsi = bsi_buf + 1;
1999             have_ref = 1;
2000             for (ref = 0; ref < 1 + has_second_rf; ++ref)
2001               have_ref &= mode_mv[this_mode][ref].as_int ==
2002                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2003           }
2004
2005           if (!subpelmv && have_ref &&
2006               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2007             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2008                        sizeof(SEG_RDSTAT));
2009             if (num_4x4_blocks_wide > 1)
2010               bsi->rdstat[i + 1][mode_idx].eobs =
2011                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
2012             if (num_4x4_blocks_high > 1)
2013               bsi->rdstat[i + 2][mode_idx].eobs =
2014                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
2015
2016             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2017               mode_selected = this_mode;
2018               best_rd = bsi->rdstat[i][mode_idx].brdcost;
2019             }
2020             continue;
2021           }
2022         }
2023
2024         bsi->rdstat[i][mode_idx].brdcost =
2025             encode_inter_mb_segment(cpi, x,
2026                                     bsi->segment_rd - this_segment_rd, i,
2027                                     &bsi->rdstat[i][mode_idx].byrate,
2028                                     &bsi->rdstat[i][mode_idx].bdist,
2029                                     &bsi->rdstat[i][mode_idx].bsse,
2030                                     bsi->rdstat[i][mode_idx].ta,
2031                                     bsi->rdstat[i][mode_idx].tl,
2032                                     mi_row, mi_col);
2033         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2034           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2035                                             bsi->rdstat[i][mode_idx].brate, 0);
2036           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2037           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2038           if (num_4x4_blocks_wide > 1)
2039             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2040           if (num_4x4_blocks_high > 1)
2041             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2042         }
2043
2044         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2045           mode_selected = this_mode;
2046           best_rd = bsi->rdstat[i][mode_idx].brdcost;
2047         }
2048       } /*for each 4x4 mode*/
2049
2050       if (best_rd == INT64_MAX) {
2051         int iy, midx;
2052         for (iy = i + 1; iy < 4; ++iy)
2053           for (midx = 0; midx < INTER_MODES; ++midx)
2054             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2055         bsi->segment_rd = INT64_MAX;
2056         return INT64_MAX;;
2057       }
2058
2059       mode_idx = INTER_OFFSET(mode_selected);
2060       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2061       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2062
2063       set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2064                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2065                            x->mvcost);
2066
2067       br += bsi->rdstat[i][mode_idx].brate;
2068       bd += bsi->rdstat[i][mode_idx].bdist;
2069       block_sse += bsi->rdstat[i][mode_idx].bsse;
2070       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2071       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2072
2073       if (this_segment_rd > bsi->segment_rd) {
2074         int iy, midx;
2075         for (iy = i + 1; iy < 4; ++iy)
2076           for (midx = 0; midx < INTER_MODES; ++midx)
2077             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2078         bsi->segment_rd = INT64_MAX;
2079         return INT64_MAX;;
2080       }
2081     }
2082   } /* for each label */
2083
2084   bsi->r = br;
2085   bsi->d = bd;
2086   bsi->segment_yrate = segmentyrate;
2087   bsi->segment_rd = this_segment_rd;
2088   bsi->sse = block_sse;
2089
2090   // update the coding decisions
2091   for (k = 0; k < 4; ++k)
2092     bsi->modes[k] = mi->bmi[k].as_mode;
2093
2094   if (bsi->segment_rd > best_rd)
2095     return INT64_MAX;
2096   /* set it to the best */
2097   for (i = 0; i < 4; i++) {
2098     mode_idx = INTER_OFFSET(bsi->modes[i]);
2099     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2100     if (has_second_ref(mbmi))
2101       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2102     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2103     mi->bmi[i].as_mode = bsi->modes[i];
2104   }
2105
2106   /*
2107    * used to set mbmi->mv.as_int
2108    */
2109   *returntotrate = bsi->r;
2110   *returndistortion = bsi->d;
2111   *returnyrate = bsi->segment_yrate;
2112   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2113   *psse = bsi->sse;
2114   mbmi->mode = bsi->modes[3];
2115
2116   return bsi->segment_rd;
2117 }
2118
2119 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2120                     uint8_t *ref_y_buffer, int ref_y_stride,
2121                     int ref_frame, BLOCK_SIZE block_size ) {
2122   MACROBLOCKD *xd = &x->e_mbd;
2123   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2124   int_mv this_mv;
2125   int i;
2126   int zero_seen = 0;
2127   int best_index = 0;
2128   int best_sad = INT_MAX;
2129   int this_sad = INT_MAX;
2130   int max_mv = 0;
2131
2132   uint8_t *src_y_ptr = x->plane[0].src.buf;
2133   uint8_t *ref_y_ptr;
2134   int row_offset, col_offset;
2135   int num_mv_refs = MAX_MV_REF_CANDIDATES +
2136                     (cpi->sf.adaptive_motion_search &&
2137                      cpi->common.show_frame &&
2138                      block_size < cpi->sf.max_partition_size);
2139
2140   int_mv pred_mv[3];
2141   pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2142   pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2143   pred_mv[2] = x->pred_mv[ref_frame];
2144
2145   // Get the sad for each candidate reference mv
2146   for (i = 0; i < num_mv_refs; i++) {
2147     this_mv.as_int = pred_mv[i].as_int;
2148
2149     max_mv = MAX(max_mv,
2150                  MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2151     // only need to check zero mv once
2152     if (!this_mv.as_int && zero_seen)
2153       continue;
2154
2155     zero_seen = zero_seen || !this_mv.as_int;
2156
2157     row_offset = this_mv.as_mv.row >> 3;
2158     col_offset = this_mv.as_mv.col >> 3;
2159     ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2160
2161     // Find sad for current vector.
2162     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2163                                            ref_y_ptr, ref_y_stride,
2164                                            0x7fffffff);
2165
2166     // Note if it is the best so far.
2167     if (this_sad < best_sad) {
2168       best_sad = this_sad;
2169       best_index = i;
2170     }
2171   }
2172
2173   // Note the index of the mv that worked best in the reference list.
2174   x->mv_best_ref_index[ref_frame] = best_index;
2175   x->max_mv_context[ref_frame] = max_mv;
2176   x->pred_mv_sad[ref_frame] = best_sad;
2177 }
2178
2179 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2180                                      const MACROBLOCKD *xd,
2181                                      int segment_id,
2182                                      unsigned int *ref_costs_single,
2183                                      unsigned int *ref_costs_comp,
2184                                      vp9_prob *comp_mode_p) {
2185   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2186                                              SEG_LVL_REF_FRAME);
2187   if (seg_ref_active) {
2188     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2189     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2190     *comp_mode_p = 128;
2191   } else {
2192     vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2193     vp9_prob comp_inter_p = 128;
2194
2195     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2196       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2197       *comp_mode_p = comp_inter_p;
2198     } else {
2199       *comp_mode_p = 128;
2200     }
2201
2202     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2203
2204     if (cm->reference_mode != COMPOUND_REFERENCE) {
2205       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2206       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2207       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2208
2209       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2210         base_cost += vp9_cost_bit(comp_inter_p, 0);
2211
2212       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2213           ref_costs_single[ALTREF_FRAME] = base_cost;
2214       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
2215       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2216       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2217       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2218       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2219     } else {
2220       ref_costs_single[LAST_FRAME]   = 512;
2221       ref_costs_single[GOLDEN_FRAME] = 512;
2222       ref_costs_single[ALTREF_FRAME] = 512;
2223     }
2224     if (cm->reference_mode != SINGLE_REFERENCE) {
2225       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2226       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2227
2228       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2229         base_cost += vp9_cost_bit(comp_inter_p, 1);
2230
2231       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
2232       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2233     } else {
2234       ref_costs_comp[LAST_FRAME]   = 512;
2235       ref_costs_comp[GOLDEN_FRAME] = 512;
2236     }
2237   }
2238 }
2239
2240 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2241                          int mode_index,
2242                          int_mv *ref_mv,
2243                          int_mv *second_ref_mv,
2244                          int64_t comp_pred_diff[REFERENCE_MODES],
2245                          const int64_t tx_size_diff[TX_MODES],
2246                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2247   MACROBLOCKD *const xd = &x->e_mbd;
2248
2249   // Take a snapshot of the coding context so it can be
2250   // restored if we decide to encode this way
2251   ctx->skip = x->skip;
2252   ctx->best_mode_index = mode_index;
2253   ctx->mic = *xd->mi[0];
2254
2255   ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2256   ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2257
2258   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2259   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
2260   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2261
2262   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2263   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2264              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2265 }
2266
2267 static void setup_pred_block(const MACROBLOCKD *xd,
2268                              struct buf_2d dst[MAX_MB_PLANE],
2269                              const YV12_BUFFER_CONFIG *src,
2270                              int mi_row, int mi_col,
2271                              const struct scale_factors *scale,
2272                              const struct scale_factors *scale_uv) {
2273   int i;
2274
2275   dst[0].buf = src->y_buffer;
2276   dst[0].stride = src->y_stride;
2277   dst[1].buf = src->u_buffer;
2278   dst[2].buf = src->v_buffer;
2279   dst[1].stride = dst[2].stride = src->uv_stride;
2280 #if CONFIG_ALPHA
2281   dst[3].buf = src->alpha_buffer;
2282   dst[3].stride = src->alpha_stride;
2283 #endif
2284
2285   // TODO(jkoleszar): Make scale factors per-plane data
2286   for (i = 0; i < MAX_MB_PLANE; i++) {
2287     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2288                      i ? scale_uv : scale,
2289                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2290   }
2291 }
2292
2293 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2294                             const TileInfo *const tile,
2295                             MV_REFERENCE_FRAME ref_frame,
2296                             BLOCK_SIZE block_size,
2297                             int mi_row, int mi_col,
2298                             int_mv frame_nearest_mv[MAX_REF_FRAMES],
2299                             int_mv frame_near_mv[MAX_REF_FRAMES],
2300                             struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2301   const VP9_COMMON *cm = &cpi->common;
2302   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2303   MACROBLOCKD *const xd = &x->e_mbd;
2304   MODE_INFO *const mi = xd->mi[0];
2305   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2306   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2307
2308   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2309   // use the UV scaling factors.
2310   setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2311
2312   // Gets an initial list of candidate vectors from neighbours and orders them
2313   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2314
2315   // Candidate refinement carried out at encoder and decoder
2316   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2317                         &frame_nearest_mv[ref_frame],
2318                         &frame_near_mv[ref_frame]);
2319
2320   // Further refinement that is encode side only to test the top few candidates
2321   // in full and choose the best as the centre point for subsequent searches.
2322   // The current implementation doesn't support scaling.
2323   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2324     mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2325             ref_frame, block_size);
2326 }
2327
2328 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2329                                                    int ref_frame) {
2330   const VP9_COMMON *const cm = &cpi->common;
2331   const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2332   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2333   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2334 }
2335
2336 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
2337   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2338   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2339   const int ctx = vp9_get_pred_context_switchable_interp(xd);
2340   return SWITCHABLE_INTERP_RATE_FACTOR *
2341              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
2342 }
2343
2344 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2345                                  BLOCK_SIZE bsize,
2346                                  int mi_row, int mi_col,
2347                                  int_mv *tmp_mv, int *rate_mv) {
2348   MACROBLOCKD *xd = &x->e_mbd;
2349   const VP9_COMMON *cm = &cpi->common;
2350   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2351   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2352   int bestsme = INT_MAX;
2353   int further_steps, step_param;
2354   int sadpb = x->sadperbit16;
2355   MV mvp_full;
2356   int ref = mbmi->ref_frame[0];
2357   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2358
2359   int tmp_col_min = x->mv_col_min;
2360   int tmp_col_max = x->mv_col_max;
2361   int tmp_row_min = x->mv_row_min;
2362   int tmp_row_max = x->mv_row_max;
2363
2364   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2365                                                                         ref);
2366
2367   MV pred_mv[3];
2368   pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2369   pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2370   pred_mv[2] = x->pred_mv[ref].as_mv;
2371
2372   if (scaled_ref_frame) {
2373     int i;
2374     // Swap out the reference frame for a version that's been scaled to
2375     // match the resolution of the current frame, allowing the existing
2376     // motion search code to be used without additional modifications.
2377     for (i = 0; i < MAX_MB_PLANE; i++)
2378       backup_yv12[i] = xd->plane[i].pre[0];
2379
2380     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2381   }
2382
2383   vp9_set_mv_search_range(x, &ref_mv);
2384
2385   // Work out the size of the first step in the mv step search.
2386   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2387   if (cpi->sf.auto_mv_step_size && cm->show_frame) {
2388     // Take wtd average of the step_params based on the last frame's
2389     // max mv magnitude and that based on the best ref mvs of the current
2390     // block for the given reference.
2391     step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2392                   cpi->mv_step_param) >> 1;
2393   } else {
2394     step_param = cpi->mv_step_param;
2395   }
2396
2397   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2398       cm->show_frame) {
2399     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2400                                                        b_width_log2(bsize)));
2401     step_param = MAX(step_param, boffset);
2402   }
2403
2404   if (cpi->sf.adaptive_motion_search) {
2405     int bwl = b_width_log2_lookup[bsize];
2406     int bhl = b_height_log2_lookup[bsize];
2407     int i;
2408     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2409
2410     if (tlevel < 5)
2411       step_param += 2;
2412
2413     for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2414       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2415         x->pred_mv[ref].as_int = 0;
2416         tmp_mv->as_int = INVALID_MV;
2417
2418         if (scaled_ref_frame) {
2419           int i;
2420           for (i = 0; i < MAX_MB_PLANE; i++)
2421             xd->plane[i].pre[0] = backup_yv12[i];
2422         }
2423         return;
2424       }
2425     }
2426   }
2427
2428   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2429
2430   mvp_full.col >>= 3;
2431   mvp_full.row >>= 3;
2432
2433   // Further step/diamond searches as necessary
2434   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2435
2436   if (cpi->sf.search_method == FAST_DIAMOND) {
2437     bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
2438                                   &cpi->fn_ptr[bsize], 1,
2439                                   &ref_mv, &tmp_mv->as_mv);
2440     if (bestsme < INT_MAX)
2441       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2442                                    &cpi->fn_ptr[bsize], 1);
2443   } else if (cpi->sf.search_method == FAST_HEX) {
2444     bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
2445                                   &cpi->fn_ptr[bsize], 1,
2446                                   &ref_mv, &tmp_mv->as_mv);
2447     if (bestsme < INT_MAX)
2448       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2449                                    &cpi->fn_ptr[bsize], 1);
2450   } else if (cpi->sf.search_method == HEX) {
2451     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2452                              &cpi->fn_ptr[bsize], 1,
2453                              &ref_mv, &tmp_mv->as_mv);
2454     if (bestsme < INT_MAX)
2455       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2456                                    &cpi->fn_ptr[bsize], 1);
2457   } else if (cpi->sf.search_method == SQUARE) {
2458     bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2459                                 &cpi->fn_ptr[bsize], 1,
2460                                 &ref_mv, &tmp_mv->as_mv);
2461     if (bestsme < INT_MAX)
2462       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2463                                    &cpi->fn_ptr[bsize], 1);
2464   } else if (cpi->sf.search_method == BIGDIA) {
2465     bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2466                                 &cpi->fn_ptr[bsize], 1,
2467                                 &ref_mv, &tmp_mv->as_mv);
2468     if (bestsme < INT_MAX)
2469       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2470                                    &cpi->fn_ptr[bsize], 1);
2471   } else {
2472     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2473                                      sadpb, further_steps, 1,
2474                                      &cpi->fn_ptr[bsize],
2475                                      &ref_mv, &tmp_mv->as_mv);
2476   }
2477
2478   x->mv_col_min = tmp_col_min;
2479   x->mv_col_max = tmp_col_max;
2480   x->mv_row_min = tmp_row_min;
2481   x->mv_row_max = tmp_row_max;
2482
2483   if (bestsme < INT_MAX) {
2484     int dis;  /* TODO: use dis in distortion calculation later. */
2485     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2486                                  cm->allow_high_precision_mv,
2487                                  x->errorperbit,
2488                                  &cpi->fn_ptr[bsize],
2489                                  cpi->sf.subpel_force_stop,
2490                                  cpi->sf.subpel_iters_per_step,
2491                                  x->nmvjointcost, x->mvcost,
2492                                  &dis, &x->pred_sse[ref]);
2493   }
2494   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2495                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2496
2497   if (cpi->sf.adaptive_motion_search && cm->show_frame)
2498     x->pred_mv[ref].as_int = tmp_mv->as_int;
2499
2500   if (scaled_ref_frame) {
2501     int i;
2502     for (i = 0; i < MAX_MB_PLANE; i++)
2503       xd->plane[i].pre[0] = backup_yv12[i];
2504   }
2505 }
2506
2507 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2508                                 BLOCK_SIZE bsize,
2509                                 int_mv *frame_mv,
2510                                 int mi_row, int mi_col,
2511                                 int_mv single_newmv[MAX_REF_FRAMES],
2512                                 int *rate_mv) {
2513   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2514   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2515   MACROBLOCKD *xd = &x->e_mbd;
2516   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2517   const int refs[2] = { mbmi->ref_frame[0],
2518                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2519   int_mv ref_mv[2];
2520   int ite, ref;
2521   // Prediction buffer from second frame.
2522   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2523   const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2524
2525   // Do joint motion search in compound mode to get more accurate mv.
2526   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2527   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2528   int last_besterr[2] = {INT_MAX, INT_MAX};
2529   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2530     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2531     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2532   };
2533
2534   for (ref = 0; ref < 2; ++ref) {
2535     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2536
2537     if (scaled_ref_frame[ref]) {
2538       int i;
2539       // Swap out the reference frame for a version that's been scaled to
2540       // match the resolution of the current frame, allowing the existing
2541       // motion search code to be used without additional modifications.
2542       for (i = 0; i < MAX_MB_PLANE; i++)
2543         backup_yv12[ref][i] = xd->plane[i].pre[ref];
2544       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2545                            NULL);
2546     }
2547
2548     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2549   }
2550
2551   // Allow joint search multiple times iteratively for each ref frame
2552   // and break out the search loop if it couldn't find better mv.
2553   for (ite = 0; ite < 4; ite++) {
2554     struct buf_2d ref_yv12[2];
2555     int bestsme = INT_MAX;
2556     int sadpb = x->sadperbit16;
2557     MV tmp_mv;
2558     int search_range = 3;
2559
2560     int tmp_col_min = x->mv_col_min;
2561     int tmp_col_max = x->mv_col_max;
2562     int tmp_row_min = x->mv_row_min;
2563     int tmp_row_max = x->mv_row_max;
2564     int id = ite % 2;
2565
2566     // Initialized here because of compiler problem in Visual Studio.
2567     ref_yv12[0] = xd->plane[0].pre[0];
2568     ref_yv12[1] = xd->plane[0].pre[1];
2569
2570     // Get pred block from second frame.
2571     vp9_build_inter_predictor(ref_yv12[!id].buf,
2572                               ref_yv12[!id].stride,
2573                               second_pred, pw,
2574                               &frame_mv[refs[!id]].as_mv,
2575                               &xd->block_refs[!id]->sf,
2576                               pw, ph, 0,
2577                               kernel, MV_PRECISION_Q3,
2578                               mi_col * MI_SIZE, mi_row * MI_SIZE);
2579
2580     // Compound motion search on first ref frame.
2581     if (id)
2582       xd->plane[0].pre[0] = ref_yv12[id];
2583     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2584
2585     // Use mv result from single mode as mvp.
2586     tmp_mv = frame_mv[refs[id]].as_mv;
2587
2588     tmp_mv.col >>= 3;
2589     tmp_mv.row >>= 3;
2590
2591     // Small-range full-pixel motion search
2592     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2593                                        search_range,
2594                                        &cpi->fn_ptr[bsize],
2595                                        &ref_mv[id].as_mv, second_pred,
2596                                        pw, ph);
2597     if (bestsme < INT_MAX)
2598       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2599                                       second_pred, &cpi->fn_ptr[bsize], 1);
2600
2601     x->mv_col_min = tmp_col_min;
2602     x->mv_col_max = tmp_col_max;
2603     x->mv_row_min = tmp_row_min;
2604     x->mv_row_max = tmp_row_max;
2605
2606     if (bestsme < INT_MAX) {
2607       int dis; /* TODO: use dis in distortion calculation later. */
2608       unsigned int sse;
2609       bestsme = cpi->find_fractional_mv_step_comp(
2610           x, &tmp_mv,
2611           &ref_mv[id].as_mv,
2612           cpi->common.allow_high_precision_mv,
2613           x->errorperbit,
2614           &cpi->fn_ptr[bsize],
2615           0, cpi->sf.subpel_iters_per_step,
2616           x->nmvjointcost, x->mvcost,
2617           &dis, &sse, second_pred,
2618           pw, ph);
2619     }
2620
2621     if (id)
2622       xd->plane[0].pre[0] = scaled_first_yv12;
2623
2624     if (bestsme < last_besterr[id]) {
2625       frame_mv[refs[id]].as_mv = tmp_mv;
2626       last_besterr[id] = bestsme;
2627     } else {
2628       break;
2629     }
2630   }
2631
2632   *rate_mv = 0;
2633
2634   for (ref = 0; ref < 2; ++ref) {
2635     if (scaled_ref_frame[ref]) {
2636       // restore the predictor
2637       int i;
2638       for (i = 0; i < MAX_MB_PLANE; i++)
2639         xd->plane[i].pre[ref] = backup_yv12[ref][i];
2640     }
2641
2642     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2643                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2644                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2645   }
2646
2647   vpx_free(second_pred);
2648 }
2649
2650 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2651                                    uint8_t *orig_dst[MAX_MB_PLANE],
2652                                    int orig_dst_stride[MAX_MB_PLANE]) {
2653   int i;
2654   for (i = 0; i < MAX_MB_PLANE; i++) {
2655     xd->plane[i].dst.buf = orig_dst[i];
2656     xd->plane[i].dst.stride = orig_dst_stride[i];
2657   }
2658 }
2659
2660 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2661                                  BLOCK_SIZE bsize,
2662                                  int64_t txfm_cache[],
2663                                  int *rate2, int64_t *distortion,
2664                                  int *skippable,
2665                                  int *rate_y, int64_t *distortion_y,
2666                                  int *rate_uv, int64_t *distortion_uv,
2667                                  int *mode_excluded, int *disable_skip,
2668                                  INTERP_FILTER *best_filter,
2669                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
2670                                  int mi_row, int mi_col,
2671                                  int_mv single_newmv[MAX_REF_FRAMES],
2672                                  int64_t *psse,
2673                                  const int64_t ref_best_rd) {
2674   VP9_COMMON *cm = &cpi->common;
2675   RD_OPT *rd_opt = &cpi->rd;
2676   MACROBLOCKD *xd = &x->e_mbd;
2677   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2678   const int is_comp_pred = has_second_ref(mbmi);
2679   const int num_refs = is_comp_pred ? 2 : 1;
2680   const int this_mode = mbmi->mode;
2681   int_mv *frame_mv = mode_mv[this_mode];
2682   int i;
2683   int refs[2] = { mbmi->ref_frame[0],
2684     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2685   int_mv cur_mv[2];
2686   int64_t this_rd = 0;
2687   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2688   int pred_exists = 0;
2689   int intpel_mv;
2690   int64_t rd, best_rd = INT64_MAX;
2691   int best_needs_copy = 0;
2692   uint8_t *orig_dst[MAX_MB_PLANE];
2693   int orig_dst_stride[MAX_MB_PLANE];
2694   int rs = 0;
2695
2696   if (is_comp_pred) {
2697     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2698         frame_mv[refs[1]].as_int == INVALID_MV)
2699       return INT64_MAX;
2700   }
2701
2702   if (this_mode == NEWMV) {
2703     int rate_mv;
2704     if (is_comp_pred) {
2705       // Initialize mv using single prediction mode result.
2706       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2707       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2708
2709       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2710         joint_motion_search(cpi, x, bsize, frame_mv,
2711                             mi_row, mi_col, single_newmv, &rate_mv);
2712       } else {
2713         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2714                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
2715                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2716         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2717                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
2718                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2719       }
2720       *rate2 += rate_mv;
2721     } else {
2722       int_mv tmp_mv;
2723       single_motion_search(cpi, x, bsize, mi_row, mi_col,
2724                            &tmp_mv, &rate_mv);
2725       if (tmp_mv.as_int == INVALID_MV)
2726         return INT64_MAX;
2727       *rate2 += rate_mv;
2728       frame_mv[refs[0]].as_int =
2729           xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2730       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2731     }
2732   }
2733
2734   for (i = 0; i < num_refs; ++i) {
2735     cur_mv[i] = frame_mv[refs[i]];
2736     // Clip "next_nearest" so that it does not extend to far out of image
2737     if (this_mode != NEWMV)
2738       clamp_mv2(&cur_mv[i].as_mv, xd);
2739
2740     if (mv_check_bounds(x, &cur_mv[i].as_mv))
2741       return INT64_MAX;
2742     mbmi->mv[i].as_int = cur_mv[i].as_int;
2743   }
2744
2745   // do first prediction into the destination buffer. Do the next
2746   // prediction into a temporary buffer. Then keep track of which one
2747   // of these currently holds the best predictor, and use the other
2748   // one for future predictions. In the end, copy from tmp_buf to
2749   // dst if necessary.
2750   for (i = 0; i < MAX_MB_PLANE; i++) {
2751     orig_dst[i] = xd->plane[i].dst.buf;
2752     orig_dst_stride[i] = xd->plane[i].dst.stride;
2753   }
2754
2755   /* We don't include the cost of the second reference here, because there
2756    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2757    * words if you present them in that order, the second one is always known
2758    * if the first is known */
2759   *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2760
2761   if (!(*mode_excluded))
2762     *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2763                                   : cm->reference_mode == COMPOUND_REFERENCE;
2764
2765   pred_exists = 0;
2766   // Are all MVs integer pel for Y and UV
2767   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2768   if (is_comp_pred)
2769     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2770
2771   // Search for best switchable filter by checking the variance of
2772   // pred error irrespective of whether the filter will be used
2773   rd_opt->mask_filter = 0;
2774   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2775     rd_opt->filter_cache[i] = INT64_MAX;
2776
2777   if (cm->interp_filter != BILINEAR) {
2778     *best_filter = EIGHTTAP;
2779     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2780       *best_filter = EIGHTTAP;
2781     } else {
2782       int newbest;
2783       int tmp_rate_sum = 0;
2784       int64_t tmp_dist_sum = 0;
2785
2786       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2787         int j;
2788         int64_t rs_rd;
2789         mbmi->interp_filter = i;
2790         rs = vp9_get_switchable_rate(cpi);
2791         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2792
2793         if (i > 0 && intpel_mv) {
2794           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2795           rd_opt->filter_cache[i] = rd;
2796           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2797               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2798           if (cm->interp_filter == SWITCHABLE)
2799             rd += rs_rd;
2800           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2801         } else {
2802           int rate_sum = 0;
2803           int64_t dist_sum = 0;
2804           if ((cm->interp_filter == SWITCHABLE &&
2805                (!i || best_needs_copy)) ||
2806               (cm->interp_filter != SWITCHABLE &&
2807                (cm->interp_filter == mbmi->interp_filter ||
2808                 (i == 0 && intpel_mv)))) {
2809             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2810           } else {
2811             for (j = 0; j < MAX_MB_PLANE; j++) {
2812               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2813               xd->plane[j].dst.stride = 64;
2814             }
2815           }
2816           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2817           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2818
2819           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2820           rd_opt->filter_cache[i] = rd;
2821           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2822               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2823           if (cm->interp_filter == SWITCHABLE)
2824             rd += rs_rd;
2825           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2826
2827           if (i == 0 && intpel_mv) {
2828             tmp_rate_sum = rate_sum;
2829             tmp_dist_sum = dist_sum;
2830           }
2831         }
2832
2833         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2834           if (rd / 2 > ref_best_rd) {
2835             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2836             return INT64_MAX;
2837           }
2838         }
2839         newbest = i == 0 || rd < best_rd;
2840
2841         if (newbest) {
2842           best_rd = rd;
2843           *best_filter = mbmi->interp_filter;
2844           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2845             best_needs_copy = !best_needs_copy;
2846         }
2847
2848         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2849             (cm->interp_filter != SWITCHABLE &&
2850              cm->interp_filter == mbmi->interp_filter)) {
2851           pred_exists = 1;
2852         }
2853       }
2854       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2855     }
2856   }
2857   // Set the appropriate filter
2858   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2859       cm->interp_filter : *best_filter;
2860   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2861
2862   if (pred_exists) {
2863     if (best_needs_copy) {
2864       // again temporarily set the buffers to local memory to prevent a memcpy
2865       for (i = 0; i < MAX_MB_PLANE; i++) {
2866         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2867         xd->plane[i].dst.stride = 64;
2868       }
2869     }
2870   } else {
2871     // Handles the special case when a filter that is not in the
2872     // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2873     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2874   }
2875
2876   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2877     int tmp_rate;
2878     int64_t tmp_dist;
2879     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2880     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2881     // if current pred_error modeled rd is substantially more than the best
2882     // so far, do not bother doing full rd
2883     if (rd / 2 > ref_best_rd) {
2884       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2885       return INT64_MAX;
2886     }
2887   }
2888
2889   if (cm->interp_filter == SWITCHABLE)
2890     *rate2 += vp9_get_switchable_rate(cpi);
2891
2892   if (!is_comp_pred) {
2893     if (!x->in_active_map) {
2894       if (psse)
2895         *psse = 0;
2896       *distortion = 0;
2897       x->skip = 1;
2898     } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2899       const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2900       const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2901       unsigned int var, sse;
2902       // Skipping threshold for ac.
2903       unsigned int thresh_ac;
2904       // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2905       // Use extreme low threshold for static frames to limit skipping.
2906       const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2907                                       ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2908       // The encode_breakout input
2909       const unsigned int min_thresh =
2910           MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2911
2912       // Calculate threshold according to dequant value.
2913       thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2914       thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2915
2916       var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2917                                    xd->plane[0].dst.buf,
2918                                    xd->plane[0].dst.stride, &sse);
2919
2920       // Adjust threshold according to partition size.
2921       thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2922           b_height_log2_lookup[bsize]);
2923
2924       // Y skipping condition checking
2925       if (sse < thresh_ac || sse == 0) {
2926         // Skipping threshold for dc
2927         unsigned int thresh_dc;
2928
2929         thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2930
2931         // dc skipping checking
2932         if ((sse - var) < thresh_dc || sse == var) {
2933           unsigned int sse_u, sse_v;
2934           unsigned int var_u, var_v;
2935
2936           var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2937                                           x->plane[1].src.stride,
2938                                           xd->plane[1].dst.buf,
2939                                           xd->plane[1].dst.stride, &sse_u);
2940
2941           // U skipping condition checking
2942           if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2943               (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2944             var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2945                                             x->plane[2].src.stride,
2946                                             xd->plane[2].dst.buf,
2947                                             xd->plane[2].dst.stride, &sse_v);
2948
2949             // V skipping condition checking
2950             if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2951                 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2952               x->skip = 1;
2953
2954               // The cost of skip bit needs to be added.
2955               *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2956
2957               // Scaling factor for SSE from spatial domain to frequency domain
2958               // is 16. Adjust distortion accordingly.
2959               *distortion_uv = (sse_u + sse_v) << 4;
2960               *distortion = (sse << 4) + *distortion_uv;
2961
2962               *disable_skip = 1;
2963               this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2964             }
2965           }
2966         }
2967       }
2968     }
2969   }
2970
2971   if (!x->skip) {
2972     int skippable_y, skippable_uv;
2973     int64_t sseuv = INT64_MAX;
2974     int64_t rdcosty = INT64_MAX;
2975
2976     // Y cost and distortion
2977     inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2978                           bsize, txfm_cache, ref_best_rd);
2979
2980     if (*rate_y == INT_MAX) {
2981       *rate2 = INT_MAX;
2982       *distortion = INT64_MAX;
2983       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2984       return INT64_MAX;
2985     }
2986
2987     *rate2 += *rate_y;
2988     *distortion += *distortion_y;
2989
2990     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2991     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2992
2993     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2994                      bsize, ref_best_rd - rdcosty);
2995     if (*rate_uv == INT_MAX) {
2996       *rate2 = INT_MAX;
2997       *distortion = INT64_MAX;
2998       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2999       return INT64_MAX;
3000     }
3001
3002     *psse += sseuv;
3003     *rate2 += *rate_uv;
3004     *distortion += *distortion_uv;
3005     *skippable = skippable_y && skippable_uv;
3006   }
3007
3008   restore_dst_buf(xd, orig_dst, orig_dst_stride);
3009   return this_rd;  // if 0, this will be re-calculated by caller
3010 }
3011
3012 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3013                            int max_plane) {
3014   struct macroblock_plane *const p = x->plane;
3015   struct macroblockd_plane *const pd = x->e_mbd.plane;
3016   int i;
3017
3018   for (i = 0; i < max_plane; ++i) {
3019     p[i].coeff    = ctx->coeff_pbuf[i][1];
3020     p[i].qcoeff  = ctx->qcoeff_pbuf[i][1];
3021     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3022     p[i].eobs    = ctx->eobs_pbuf[i][1];
3023
3024     ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];
3025     ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];
3026     ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3027     ctx->eobs_pbuf[i][1]    = ctx->eobs_pbuf[i][0];
3028
3029     ctx->coeff_pbuf[i][0]   = p[i].coeff;
3030     ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
3031     ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3032     ctx->eobs_pbuf[i][0]    = p[i].eobs;
3033   }
3034 }
3035
3036 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3037                                int *returnrate, int64_t *returndist,
3038                                BLOCK_SIZE bsize,
3039                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3040   VP9_COMMON *const cm = &cpi->common;
3041   MACROBLOCKD *const xd = &x->e_mbd;
3042   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3043   int y_skip = 0, uv_skip = 0;
3044   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3045   TX_SIZE max_uv_tx_size;
3046   x->skip_encode = 0;
3047   ctx->skip = 0;
3048   xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3049
3050   if (bsize >= BLOCK_8X8) {
3051     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3052                                &dist_y, &y_skip, bsize, tx_cache,
3053                                best_rd) >= best_rd) {
3054       *returnrate = INT_MAX;
3055       return;
3056     }
3057     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3058     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3059                             &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3060   } else {
3061     y_skip = 0;
3062     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3063                                      &dist_y, best_rd) >= best_rd) {
3064       *returnrate = INT_MAX;
3065       return;
3066     }
3067     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3068     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3069                             &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3070   }
3071
3072   if (y_skip && uv_skip) {
3073     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3074                   vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3075     *returndist = dist_y + dist_uv;
3076     vp9_zero(ctx->tx_rd_diff);
3077   } else {
3078     int i;
3079     *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3080     *returndist = dist_y + dist_uv;
3081     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3082       for (i = 0; i < TX_MODES; i++) {
3083         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3084           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3085         else
3086           ctx->tx_rd_diff[i] = 0;
3087       }
3088   }
3089
3090   ctx->mic = *xd->mi[0];
3091 }
3092
3093 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
3094                                       int thresh_fact) {
3095     return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
3096 }
3097
3098 // Updating rd_thresh_freq_fact[] here means that the different
3099 // partition/block sizes are handled independently based on the best
3100 // choice for the current partition. It may well be better to keep a scaled
3101 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3102 // combination that wins out.
3103 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
3104                                   int best_mode_index) {
3105   if (cpi->sf.adaptive_rd_thresh > 0) {
3106     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
3107     int mode;
3108     for (mode = 0; mode < top_mode; ++mode) {
3109       int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
3110
3111       if (mode == best_mode_index) {
3112         *fact -= (*fact >> 3);
3113       } else {
3114         *fact = MIN(*fact + RD_THRESH_INC,
3115                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3116       }
3117     }
3118   }
3119 }
3120
3121 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3122                                   const TileInfo *const tile,
3123                                   int mi_row, int mi_col,
3124                                   int *returnrate,
3125                                   int64_t *returndistortion,
3126                                   BLOCK_SIZE bsize,
3127                                   PICK_MODE_CONTEXT *ctx,
3128                                   int64_t best_rd_so_far) {
3129   VP9_COMMON *const cm = &cpi->common;
3130   RD_OPT *const rd_opt = &cpi->rd;
3131   MACROBLOCKD *const xd = &x->e_mbd;
3132   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3133   const struct segmentation *const seg = &cm->seg;
3134   PREDICTION_MODE this_mode;
3135   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3136   unsigned char segment_id = mbmi->segment_id;
3137   int comp_pred, i;
3138   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3139   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3140   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3141   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3142                                     VP9_ALT_FLAG };
3143   int64_t best_rd = best_rd_so_far;
3144   int64_t best_tx_rd[TX_MODES];
3145   int64_t best_tx_diff[TX_MODES];
3146   int64_t best_pred_diff[REFERENCE_MODES];
3147   int64_t best_pred_rd[REFERENCE_MODES];
3148   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3149   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3150   MB_MODE_INFO best_mbmode = { 0 };
3151   int mode_index, best_mode_index = -1;
3152   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3153   vp9_prob comp_mode_p;
3154   int64_t best_intra_rd = INT64_MAX;
3155   int64_t best_inter_rd = INT64_MAX;
3156   PREDICTION_MODE best_intra_mode = DC_PRED;
3157   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3158   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3159   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3160   int64_t dist_uv[TX_SIZES];
3161   int skip_uv[TX_SIZES];
3162   PREDICTION_MODE mode_uv[TX_SIZES];
3163   int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3164   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3165   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3166   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3167   int best_skip2 = 0;
3168   int mode_skip_mask = 0;
3169   int mode_skip_start = cpi->sf.mode_skip_start + 1;
3170   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3171   const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
3172   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3173   const int intra_y_mode_mask =
3174       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3175   int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3176
3177   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3178
3179   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3180                            &comp_mode_p);
3181
3182   for (i = 0; i < REFERENCE_MODES; ++i)
3183     best_pred_rd[i] = INT64_MAX;
3184   for (i = 0; i < TX_MODES; i++)
3185     best_tx_rd[i] = INT64_MAX;
3186   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3187     best_filter_rd[i] = INT64_MAX;
3188   for (i = 0; i < TX_SIZES; i++)
3189     rate_uv_intra[i] = INT_MAX;
3190   for (i = 0; i < MAX_REF_FRAMES; ++i)
3191     x->pred_sse[i] = INT_MAX;
3192
3193   *returnrate = INT_MAX;
3194
3195   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3196     x->pred_mv_sad[ref_frame] = INT_MAX;
3197     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3198       vp9_setup_buffer_inter(cpi, x, tile,
3199                              ref_frame, bsize, mi_row, mi_col,
3200                              frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3201     }
3202     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3203     frame_mv[ZEROMV][ref_frame].as_int = 0;
3204   }
3205
3206   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3207     // All modes from vp9_mode_order that use this frame as any ref
3208     static const int ref_frame_mask_all[] = {
3209         0x0, 0x123291, 0x25c444, 0x39b722
3210     };
3211     // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3212     // this frame as their primary ref
3213     static const int ref_frame_mask_fixedmv[] = {
3214         0x0, 0x121281, 0x24c404, 0x080102
3215     };
3216     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3217       // Skip modes for missing references
3218       mode_skip_mask |= ref_frame_mask_all[ref_frame];
3219     } else if (cpi->sf.reference_masking) {
3220       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3221         // Skip fixed mv modes for poor references
3222         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3223           mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3224           break;
3225         }
3226       }
3227     }
3228     // If the segment reference frame feature is enabled....
3229     // then do nothing if the current ref frame is not allowed..
3230     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3231         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3232       mode_skip_mask |= ref_frame_mask_all[ref_frame];
3233     }
3234   }
3235
3236   // If the segment skip feature is enabled....
3237   // then do nothing if the current mode is not allowed..
3238   if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3239     const int inter_non_zero_mode_mask = 0x1F7F7;
3240     mode_skip_mask |= inter_non_zero_mode_mask;
3241   }
3242
3243   // Disable this drop out case if the ref frame
3244   // segment level feature is enabled for this segment. This is to
3245   // prevent the possibility that we end up unable to pick any mode.
3246   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3247     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3248     // unless ARNR filtering is enabled in which case we want
3249     // an unfiltered alternative. We allow near/nearest as well
3250     // because they may result in zero-zero MVs but be cheaper.
3251     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3252       mode_skip_mask =
3253           ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3254       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3255         mode_skip_mask |= (1 << THR_NEARA);
3256       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3257         mode_skip_mask |= (1 << THR_NEARESTA);
3258     }
3259   }
3260
3261   // TODO(JBB): This is to make up for the fact that we don't have sad
3262   // functions that work when the block size reads outside the umv.  We
3263   // should fix this either by making the motion search just work on
3264   // a representative block in the boundary ( first ) and then implement a
3265   // function that does sads when inside the border..
3266   if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3267     const int new_modes_mask =
3268         (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3269         (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3270     mode_skip_mask |= new_modes_mask;
3271   }
3272
3273   if (bsize > cpi->sf.max_intra_bsize) {
3274     mode_skip_mask |= 0xFF30808;
3275   }
3276
3277   if (!x->in_active_map) {
3278     int mode_index;
3279     assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3280     if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3281       mode_index = THR_NEARESTMV;
3282     else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3283       mode_index = THR_NEARMV;
3284     else
3285       mode_index = THR_ZEROMV;
3286     mode_skip_mask = ~(1 << mode_index);
3287     mode_skip_start = MAX_MODES;
3288     disable_inter_mode_mask = 0;
3289   }
3290
3291   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3292     int mode_excluded = 0;
3293     int64_t this_rd = INT64_MAX;
3294     int disable_skip = 0;
3295     int compmode_cost = 0;
3296     int rate2 = 0, rate_y = 0, rate_uv = 0;
3297     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3298     int skippable = 0;
3299     int64_t tx_cache[TX_MODES];
3300     int i;
3301     int this_skip2 = 0;
3302     int64_t total_sse = INT64_MAX;
3303     int early_term = 0;
3304
3305     // Look at the reference frame of the best mode so far and set the
3306     // skip mask to look at a subset of the remaining modes.
3307     if (mode_index == mode_skip_start && best_mode_index >= 0) {
3308       switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3309         case INTRA_FRAME:
3310           break;
3311         case LAST_FRAME:
3312           mode_skip_mask |= LAST_FRAME_MODE_MASK;
3313           break;
3314         case GOLDEN_FRAME:
3315           mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3316           break;
3317         case ALTREF_FRAME:
3318           mode_skip_mask |= ALT_REF_MODE_MASK;
3319           break;
3320         case NONE:
3321         case MAX_REF_FRAMES:
3322           assert(0 && "Invalid Reference frame");
3323       }
3324     }
3325     if (mode_skip_mask & (1 << mode_index))
3326       continue;
3327
3328     // Test best rd so far against threshold for trying this mode.
3329     if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
3330         rd_thresh_freq_fact[mode_index]))
3331       continue;
3332
3333     this_mode = vp9_mode_order[mode_index].mode;
3334     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3335     if (ref_frame != INTRA_FRAME &&
3336         disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3337       continue;
3338     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3339
3340     comp_pred = second_ref_frame > INTRA_FRAME;
3341     if (comp_pred) {
3342       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3343           best_mode_index >=0 &&
3344           vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3345         continue;
3346       if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3347           ref_frame != best_inter_ref_frame &&
3348           second_ref_frame != best_inter_ref_frame)
3349         continue;
3350       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3351     } else {
3352       if (ref_frame != INTRA_FRAME)
3353         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3354     }
3355
3356     if (ref_frame == INTRA_FRAME) {
3357       if (!(intra_y_mode_mask & (1 << this_mode)))
3358         continue;
3359       if (this_mode != DC_PRED) {
3360         // Disable intra modes other than DC_PRED for blocks with low variance
3361         // Threshold for intra skipping based on source variance
3362         // TODO(debargha): Specialize the threshold for super block sizes
3363         const unsigned int skip_intra_var_thresh = 64;
3364         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3365             x->source_variance < skip_intra_var_thresh)
3366           continue;
3367         // Only search the oblique modes if the best so far is
3368         // one of the neighboring directional modes
3369         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3370             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3371           if (best_mode_index >= 0 &&
3372               vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3373             continue;
3374         }
3375         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3376           if (conditional_skipintra(this_mode, best_intra_mode))
3377               continue;
3378         }
3379       }
3380     } else {
3381       if (x->in_active_map &&
3382           !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
3383         const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
3384         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3385                                 disable_inter_mode_mask, this_mode, ref_frames))
3386           continue;
3387       }
3388     }
3389
3390     mbmi->mode = this_mode;
3391     mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3392     mbmi->ref_frame[0] = ref_frame;
3393     mbmi->ref_frame[1] = second_ref_frame;
3394     // Evaluate all sub-pel filters irrespective of whether we can use
3395     // them for this frame.
3396     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3397                                                           : cm->interp_filter;
3398     x->skip = 0;
3399     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3400
3401     // Select prediction reference frames.
3402     for (i = 0; i < MAX_MB_PLANE; i++) {
3403       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3404       if (comp_pred)
3405         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3406     }
3407
3408     for (i = 0; i < TX_MODES; ++i)
3409       tx_cache[i] = INT64_MAX;
3410
3411 #ifdef MODE_TEST_HIT_STATS
3412     // TEST/DEBUG CODE
3413     // Keep a rcord of the number of test hits at each size
3414     cpi->mode_test_hits[bsize]++;
3415 #endif
3416
3417     if (ref_frame == INTRA_FRAME) {
3418       TX_SIZE uv_tx;
3419       intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3420                             bsize, tx_cache, best_rd);
3421
3422       if (rate_y == INT_MAX)
3423         continue;
3424
3425       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3426       if (rate_uv_intra[uv_tx] == INT_MAX) {
3427         choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3428                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3429                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3430       }
3431
3432       rate_uv = rate_uv_tokenonly[uv_tx];
3433       distortion_uv = dist_uv[uv_tx];
3434       skippable = skippable && skip_uv[uv_tx];
3435       mbmi->uv_mode = mode_uv[uv_tx];
3436
3437       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3438       if (this_mode != DC_PRED && this_mode != TM_PRED)
3439         rate2 += intra_cost_penalty;
3440       distortion2 = distortion_y + distortion_uv;
3441     } else {
3442       this_rd = handle_inter_mode(cpi, x, bsize,
3443                                   tx_cache,
3444                                   &rate2, &distortion2, &skippable,
3445                                   &rate_y, &distortion_y,
3446                                   &rate_uv, &distortion_uv,
3447                                   &mode_excluded, &disable_skip,
3448                                   &tmp_best_filter, frame_mv,
3449                                   mi_row, mi_col,
3450                                   single_newmv, &total_sse, best_rd);
3451       if (this_rd == INT64_MAX)
3452         continue;
3453
3454       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3455
3456       if (cm->reference_mode == REFERENCE_MODE_SELECT)
3457         rate2 += compmode_cost;
3458     }
3459
3460     // Estimate the reference frame signaling cost and add it
3461     // to the rolling cost variable.
3462     if (comp_pred) {
3463       rate2 += ref_costs_comp[ref_frame];
3464     } else {
3465       rate2 += ref_costs_single[ref_frame];
3466     }
3467
3468     if (!disable_skip) {
3469       // Test for the condition where skip block will be activated
3470       // because there are no non zero coefficients and make any
3471       // necessary adjustment for rate. Ignore if skip is coded at
3472       // segment level as the cost wont have been added in.
3473       // Is Mb level skip allowed (i.e. not coded at segment level).
3474       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3475                                                          SEG_LVL_SKIP);
3476
3477       if (skippable) {
3478         // Back out the coefficient coding costs
3479         rate2 -= (rate_y + rate_uv);
3480         // for best yrd calculation
3481         rate_uv = 0;
3482
3483         if (mb_skip_allowed) {
3484           int prob_skip_cost;
3485
3486           // Cost the skip mb case
3487           vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3488           if (skip_prob) {
3489             prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3490             rate2 += prob_skip_cost;
3491           }
3492         }
3493       } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3494         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3495             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3496           // Add in the cost of the no skip flag.
3497           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3498         } else {
3499           // FIXME(rbultje) make this work for splitmv also
3500           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3501           distortion2 = total_sse;
3502           assert(total_sse >= 0);
3503           rate2 -= (rate_y + rate_uv);
3504           rate_y = 0;
3505           rate_uv = 0;
3506           this_skip2 = 1;
3507         }
3508       } else if (mb_skip_allowed) {
3509         // Add in the cost of the no skip flag.
3510         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3511       }
3512
3513       // Calculate the final RD estimate for this mode.
3514       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3515     }
3516
3517     if (ref_frame == INTRA_FRAME) {
3518     // Keep record of best intra rd
3519       if (this_rd < best_intra_rd) {
3520         best_intra_rd = this_rd;
3521         best_intra_mode = mbmi->mode;
3522       }
3523     } else {
3524       // Keep record of best inter rd with single reference
3525       if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3526         best_inter_rd = this_rd;
3527         best_inter_ref_frame = ref_frame;
3528       }
3529     }
3530
3531     if (!disable_skip && ref_frame == INTRA_FRAME) {
3532       for (i = 0; i < REFERENCE_MODES; ++i)
3533         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3534       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3535         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3536     }
3537
3538     // Store the respective mode distortions for later use.
3539     if (mode_distortions[this_mode] == -1
3540         || distortion2 < mode_distortions[this_mode]) {
3541       mode_distortions[this_mode] = distortion2;
3542     }
3543
3544     // Did this mode help.. i.e. is it the new best mode
3545     if (this_rd < best_rd || x->skip) {
3546       int max_plane = MAX_MB_PLANE;
3547       if (!mode_excluded) {
3548         // Note index of best mode so far
3549         best_mode_index = mode_index;
3550
3551         if (ref_frame == INTRA_FRAME) {
3552           /* required for left and above block mv */
3553           mbmi->mv[0].as_int = 0;
3554           max_plane = 1;
3555         }
3556
3557         *returnrate = rate2;
3558         *returndistortion = distortion2;
3559         best_rd = this_rd;
3560         best_mbmode = *mbmi;
3561         best_skip2 = this_skip2;
3562         if (!x->select_txfm_size)
3563           swap_block_ptr(x, ctx, max_plane);
3564         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3565                    sizeof(uint8_t) * ctx->num_4x4_blk);
3566
3567         // TODO(debargha): enhance this test with a better distortion prediction
3568         // based on qp, activity mask and history
3569         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3570             (mode_index > MIN_EARLY_TERM_INDEX)) {
3571           const int qstep = xd->plane[0].dequant[1];
3572           // TODO(debargha): Enhance this by specializing for each mode_index
3573           int scale = 4;
3574           if (x->source_variance < UINT_MAX) {
3575             const int var_adjust = (x->source_variance < 16);
3576             scale -= var_adjust;
3577           }
3578           if (ref_frame > INTRA_FRAME &&
3579               distortion2 * scale < qstep * qstep) {
3580             early_term = 1;
3581           }
3582         }
3583       }
3584     }
3585
3586     /* keep record of best compound/single-only prediction */
3587     if (!disable_skip && ref_frame != INTRA_FRAME) {
3588       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3589
3590       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3591         single_rate = rate2 - compmode_cost;
3592         hybrid_rate = rate2;
3593       } else {
3594         single_rate = rate2;
3595         hybrid_rate = rate2 + compmode_cost;
3596       }
3597
3598       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3599       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3600
3601       if (!comp_pred) {
3602         if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3603           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3604         }
3605       } else {
3606         if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3607           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3608         }
3609       }
3610       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3611         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3612
3613       /* keep record of best filter type */
3614       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3615         int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3616                               SWITCHABLE_FILTERS : cm->interp_filter];
3617
3618         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3619           int64_t adj_rd;
3620           if (ref == INT64_MAX)
3621             adj_rd = 0;
3622           else if (rd_opt->filter_cache[i] == INT64_MAX)
3623             // when early termination is triggered, the encoder does not have
3624             // access to the rate-distortion cost. it only knows that the cost
3625             // should be above the maximum valid value. hence it takes the known
3626             // maximum plus an arbitrary constant as the rate-distortion cost.
3627             adj_rd = rd_opt->mask_filter - ref + 10;
3628           else
3629             adj_rd = rd_opt->filter_cache[i] - ref;
3630
3631           adj_rd += this_rd;
3632           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3633         }
3634       }
3635     }
3636
3637     /* keep record of best txfm size */
3638     if (bsize < BLOCK_32X32) {
3639       if (bsize < BLOCK_16X16)
3640         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3641
3642       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3643     }
3644     if (!mode_excluded && this_rd != INT64_MAX) {
3645       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3646         int64_t adj_rd = INT64_MAX;
3647         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3648
3649         if (adj_rd < best_tx_rd[i])
3650           best_tx_rd[i] = adj_rd;
3651       }
3652     }
3653
3654     if (early_term)
3655       break;
3656
3657     if (x->skip && !comp_pred)
3658       break;
3659   }
3660
3661   if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3662     return INT64_MAX;
3663
3664   // If we used an estimate for the uv intra rd in the loop above...
3665   if (cpi->sf.use_uv_intra_rd_estimate) {
3666     // Do Intra UV best rd mode selection if best mode choice above was intra.
3667     if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3668       TX_SIZE uv_tx_size;
3669       *mbmi = best_mbmode;
3670       uv_tx_size = get_uv_tx_size(mbmi);
3671       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3672                               &rate_uv_tokenonly[uv_tx_size],
3673                               &dist_uv[uv_tx_size],
3674                               &skip_uv[uv_tx_size],
3675                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3676                               uv_tx_size);
3677     }
3678   }
3679
3680   assert((cm->interp_filter == SWITCHABLE) ||
3681          (cm->interp_filter == best_mbmode.interp_filter) ||
3682          !is_inter_block(&best_mbmode));
3683
3684   update_rd_thresh_fact(cpi, bsize, best_mode_index);
3685
3686   // macroblock modes
3687   *mbmi = best_mbmode;
3688   x->skip |= best_skip2;
3689
3690   for (i = 0; i < REFERENCE_MODES; ++i) {
3691     if (best_pred_rd[i] == INT64_MAX)
3692       best_pred_diff[i] = INT_MIN;
3693     else
3694       best_pred_diff[i] = best_rd - best_pred_rd[i];
3695   }
3696
3697   if (!x->skip) {
3698     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3699       if (best_filter_rd[i] == INT64_MAX)
3700         best_filter_diff[i] = 0;
3701       else
3702         best_filter_diff[i] = best_rd - best_filter_rd[i];
3703     }
3704     if (cm->interp_filter == SWITCHABLE)
3705       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3706     for (i = 0; i < TX_MODES; i++) {
3707       if (best_tx_rd[i] == INT64_MAX)
3708         best_tx_diff[i] = 0;
3709       else
3710         best_tx_diff[i] = best_rd - best_tx_rd[i];
3711     }
3712   } else {
3713     vp9_zero(best_filter_diff);
3714     vp9_zero(best_tx_diff);
3715   }
3716
3717   if (!x->in_active_map) {
3718     assert(mbmi->ref_frame[0] == LAST_FRAME);
3719     assert(mbmi->ref_frame[1] == NONE);
3720     assert(mbmi->mode == NEARESTMV ||
3721            mbmi->mode == NEARMV ||
3722            mbmi->mode == ZEROMV);
3723     assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3724     assert(mbmi->mode == mbmi->uv_mode);
3725   }
3726
3727   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3728   store_coding_context(x, ctx, best_mode_index,
3729                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3730                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3731                                       mbmi->ref_frame[1]][0],
3732                        best_pred_diff, best_tx_diff, best_filter_diff);
3733
3734   return best_rd;
3735 }
3736
3737
3738 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3739                                       const TileInfo *const tile,
3740                                       int mi_row, int mi_col,
3741                                       int *returnrate,
3742                                       int64_t *returndistortion,
3743                                       BLOCK_SIZE bsize,
3744                                       PICK_MODE_CONTEXT *ctx,
3745                                       int64_t best_rd_so_far) {
3746   VP9_COMMON *const cm = &cpi->common;
3747   RD_OPT *const rd_opt = &cpi->rd;
3748   MACROBLOCKD *const xd = &x->e_mbd;
3749   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3750   const struct segmentation *const seg = &cm->seg;
3751   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3752   unsigned char segment_id = mbmi->segment_id;
3753   int comp_pred, i;
3754   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3755   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3756   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3757                                     VP9_ALT_FLAG };
3758   int64_t best_rd = best_rd_so_far;
3759   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3760   static const int64_t best_tx_diff[TX_MODES] = { 0 };
3761   int64_t best_pred_diff[REFERENCE_MODES];
3762   int64_t best_pred_rd[REFERENCE_MODES];
3763   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3764   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3765   MB_MODE_INFO best_mbmode = { 0 };
3766   int ref_index, best_ref_index = 0;
3767   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3768   vp9_prob comp_mode_p;
3769   int64_t best_inter_rd = INT64_MAX;
3770   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3771   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3772   int rate_uv_intra, rate_uv_tokenonly;
3773   int64_t dist_uv;
3774   int skip_uv;
3775   PREDICTION_MODE mode_uv = DC_PRED;
3776   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3777   int_mv seg_mvs[4][MAX_REF_FRAMES];
3778   b_mode_info best_bmodes[4];
3779   int best_skip2 = 0;
3780   int ref_frame_mask = 0;
3781   int mode_skip_mask = 0;
3782
3783   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3784   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3785
3786   for (i = 0; i < 4; i++) {
3787     int j;
3788     for (j = 0; j < MAX_REF_FRAMES; j++)
3789       seg_mvs[i][j].as_int = INVALID_MV;
3790   }
3791
3792   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3793                            &comp_mode_p);
3794
3795   for (i = 0; i < REFERENCE_MODES; ++i)
3796     best_pred_rd[i] = INT64_MAX;
3797   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3798     best_filter_rd[i] = INT64_MAX;
3799   rate_uv_intra = INT_MAX;
3800
3801   *returnrate = INT_MAX;
3802
3803   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3804     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3805       vp9_setup_buffer_inter(cpi, x, tile,
3806                              ref_frame, bsize, mi_row, mi_col,
3807                              frame_mv[NEARESTMV], frame_mv[NEARMV],
3808                              yv12_mb);
3809     }
3810     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3811     frame_mv[ZEROMV][ref_frame].as_int = 0;
3812   }
3813
3814   for (ref_frame = LAST_FRAME;
3815        ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3816     int i;
3817     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3818       if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3819         ref_frame_mask |= (1 << ref_frame);
3820         break;
3821       }
3822     }
3823   }
3824
3825   for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3826     int mode_excluded = 0;
3827     int64_t this_rd = INT64_MAX;
3828     int disable_skip = 0;
3829     int compmode_cost = 0;
3830     int rate2 = 0, rate_y = 0, rate_uv = 0;
3831     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3832     int skippable = 0;
3833     int i;
3834     int this_skip2 = 0;
3835     int64_t total_sse = INT_MAX;
3836     int early_term = 0;
3837
3838     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3839     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3840
3841     // Look at the reference frame of the best mode so far and set the
3842     // skip mask to look at a subset of the remaining modes.
3843     if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3844       if (ref_index == 3) {
3845         switch (vp9_ref_order[best_ref_index].ref_frame[0]) {
3846           case INTRA_FRAME:
3847             mode_skip_mask = 0;
3848             break;
3849           case LAST_FRAME:
3850             mode_skip_mask = 0x0010;
3851             break;
3852           case GOLDEN_FRAME:
3853             mode_skip_mask = 0x0008;
3854             break;
3855           case ALTREF_FRAME:
3856             mode_skip_mask = 0x0000;
3857             break;
3858           case NONE:
3859           case MAX_REF_FRAMES:
3860             assert(0 && "Invalid Reference frame");
3861         }
3862       }
3863       if (mode_skip_mask & (1 << ref_index))
3864         continue;
3865     }
3866
3867     // Test best rd so far against threshold for trying this mode.
3868     if (rd_less_than_thresh(best_rd,
3869                             rd_opt->threshes[segment_id][bsize][ref_index],
3870                             rd_opt->thresh_freq_fact[bsize][ref_index]))
3871       continue;
3872
3873     if (ref_frame > INTRA_FRAME &&
3874         !(cpi->ref_frame_flags & flag_list[ref_frame])) {
3875       continue;
3876     }
3877
3878     comp_pred = second_ref_frame > INTRA_FRAME;
3879     if (comp_pred) {
3880       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3881         continue;
3882       // Do not allow compound prediction if the segment level reference frame
3883       // feature is in use as in this case there can only be one reference.
3884       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3885         continue;
3886       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3887           vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME)
3888         continue;
3889       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3890           ref_frame != best_inter_ref_frame &&
3891           second_ref_frame != best_inter_ref_frame)
3892         continue;
3893     }
3894
3895     // TODO(jingning, jkoleszar): scaling reference frame not supported for
3896     // sub8x8 blocks.
3897     if (ref_frame > INTRA_FRAME &&
3898         vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3899       continue;
3900
3901     if (second_ref_frame > INTRA_FRAME &&
3902         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3903       continue;
3904
3905     if (comp_pred) {
3906       mode_excluded = mode_excluded ? mode_excluded
3907                                     : cm->reference_mode == SINGLE_REFERENCE;
3908     } else if (ref_frame != INTRA_FRAME) {
3909       mode_excluded = mode_excluded ? mode_excluded
3910                                     : cm->reference_mode == COMPOUND_REFERENCE;
3911     }
3912
3913     // If the segment reference frame feature is enabled....
3914     // then do nothing if the current ref frame is not allowed..
3915     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3916         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3917             (int)ref_frame) {
3918       continue;
3919     // If the segment skip feature is enabled....
3920     // then do nothing if the current mode is not allowed..
3921     } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3922                ref_frame != INTRA_FRAME) {
3923       continue;
3924     // Disable this drop out case if the ref frame
3925     // segment level feature is enabled for this segment. This is to
3926     // prevent the possibility that we end up unable to pick any mode.
3927     } else if (!vp9_segfeature_active(seg, segment_id,
3928                                       SEG_LVL_REF_FRAME)) {
3929       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3930       // unless ARNR filtering is enabled in which case we want
3931       // an unfiltered alternative. We allow near/nearest as well
3932       // because they may result in zero-zero MVs but be cheaper.
3933       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3934         continue;
3935     }
3936
3937     mbmi->tx_size = TX_4X4;
3938     mbmi->uv_mode = DC_PRED;
3939     mbmi->ref_frame[0] = ref_frame;
3940     mbmi->ref_frame[1] = second_ref_frame;
3941     // Evaluate all sub-pel filters irrespective of whether we can use
3942     // them for this frame.
3943     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3944                                                           : cm->interp_filter;
3945     x->skip = 0;
3946     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3947
3948     // Select prediction reference frames.
3949     for (i = 0; i < MAX_MB_PLANE; i++) {
3950       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3951       if (comp_pred)
3952         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3953     }
3954
3955 #ifdef MODE_TEST_HIT_STATS
3956     // TEST/DEBUG CODE
3957     // Keep a rcord of the number of test hits at each size
3958     cpi->mode_test_hits[bsize]++;
3959 #endif
3960
3961     if (ref_frame == INTRA_FRAME) {
3962       int rate;
3963       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3964                                        &distortion_y, best_rd) >= best_rd)
3965         continue;
3966       rate2 += rate;
3967       rate2 += intra_cost_penalty;
3968       distortion2 += distortion_y;
3969
3970       if (rate_uv_intra == INT_MAX) {
3971         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3972                              &rate_uv_intra,
3973                              &rate_uv_tokenonly,
3974                              &dist_uv, &skip_uv,
3975                              &mode_uv);
3976       }
3977       rate2 += rate_uv_intra;
3978       rate_uv = rate_uv_tokenonly;
3979       distortion2 += dist_uv;
3980       distortion_uv = dist_uv;
3981       mbmi->uv_mode = mode_uv;
3982     } else {
3983       int rate;
3984       int64_t distortion;
3985       int64_t this_rd_thresh;
3986       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3987       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3988       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3989       int tmp_best_skippable = 0;
3990       int switchable_filter_index;
3991       int_mv *second_ref = comp_pred ?
3992                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3993       b_mode_info tmp_best_bmodes[16];
3994       MB_MODE_INFO tmp_best_mbmode;
3995       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3996       int pred_exists = 0;
3997       int uv_skippable;
3998
3999       this_rd_thresh = (ref_frame == LAST_FRAME) ?
4000           rd_opt->threshes[segment_id][bsize][THR_LAST] :
4001           rd_opt->threshes[segment_id][bsize][THR_ALTR];
4002       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4003       rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4004       rd_opt->mask_filter = 0;
4005       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4006         rd_opt->filter_cache[i] = INT64_MAX;
4007
4008       if (cm->interp_filter != BILINEAR) {
4009         tmp_best_filter = EIGHTTAP;
4010         if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
4011           tmp_best_filter = EIGHTTAP;
4012         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4013                    ctx->pred_interp_filter < SWITCHABLE) {
4014           tmp_best_filter = ctx->pred_interp_filter;
4015         } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4016           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4017                               ctx->pred_interp_filter : 0;
4018         } else {
4019           for (switchable_filter_index = 0;
4020                switchable_filter_index < SWITCHABLE_FILTERS;
4021                ++switchable_filter_index) {
4022             int newbest, rs;
4023             int64_t rs_rd;
4024             mbmi->interp_filter = switchable_filter_index;
4025             tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
4026                                               &mbmi->ref_mvs[ref_frame][0],
4027                                               second_ref, best_yrd, &rate,
4028                                               &rate_y, &distortion,
4029                                               &skippable, &total_sse,
4030                                               (int) this_rd_thresh, seg_mvs,
4031                                               bsi, switchable_filter_index,
4032                                               mi_row, mi_col);
4033
4034             if (tmp_rd == INT64_MAX)
4035               continue;
4036             rs = vp9_get_switchable_rate(cpi);
4037             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4038             rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
4039             rd_opt->filter_cache[SWITCHABLE_FILTERS] =
4040                 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
4041                     tmp_rd + rs_rd);
4042             if (cm->interp_filter == SWITCHABLE)
4043               tmp_rd += rs_rd;
4044
4045             rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
4046
4047             newbest = (tmp_rd < tmp_best_rd);
4048             if (newbest) {
4049               tmp_best_filter = mbmi->interp_filter;
4050               tmp_best_rd = tmp_rd;
4051             }
4052             if ((newbest && cm->interp_filter == SWITCHABLE) ||
4053                 (mbmi->interp_filter == cm->interp_filter &&
4054                  cm->interp_filter != SWITCHABLE)) {
4055               tmp_best_rdu = tmp_rd;
4056               tmp_best_rate = rate;
4057               tmp_best_ratey = rate_y;
4058               tmp_best_distortion = distortion;
4059               tmp_best_sse = total_sse;
4060               tmp_best_skippable = skippable;
4061               tmp_best_mbmode = *mbmi;
4062               for (i = 0; i < 4; i++) {
4063                 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4064                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4065               }
4066               pred_exists = 1;
4067               if (switchable_filter_index == 0 &&
4068                   cpi->sf.use_rd_breakout &&
4069                   best_rd < INT64_MAX) {
4070                 if (tmp_best_rdu / 2 > best_rd) {
4071                   // skip searching the other filters if the first is
4072                   // already substantially larger than the best so far
4073                   tmp_best_filter = mbmi->interp_filter;
4074                   tmp_best_rdu = INT64_MAX;
4075                   break;
4076                 }
4077               }
4078             }
4079           }  // switchable_filter_index loop
4080         }
4081       }
4082
4083       if (tmp_best_rdu == INT64_MAX && pred_exists)
4084         continue;
4085
4086       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4087                              tmp_best_filter : cm->interp_filter);
4088       if (!pred_exists) {
4089         // Handles the special case when a filter that is not in the
4090         // switchable list (bilinear, 6-tap) is indicated at the frame level
4091         tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
4092                                           &mbmi->ref_mvs[ref_frame][0],
4093                                           second_ref, best_yrd, &rate, &rate_y,
4094                                           &distortion, &skippable, &total_sse,
4095                                           (int) this_rd_thresh, seg_mvs, bsi, 0,
4096                                           mi_row, mi_col);
4097         if (tmp_rd == INT64_MAX)
4098           continue;
4099       } else {
4100         total_sse = tmp_best_sse;
4101         rate = tmp_best_rate;
4102         rate_y = tmp_best_ratey;
4103         distortion = tmp_best_distortion;
4104         skippable = tmp_best_skippable;
4105         *mbmi = tmp_best_mbmode;
4106         for (i = 0; i < 4; i++)
4107           xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4108       }
4109
4110       rate2 += rate;
4111       distortion2 += distortion;
4112
4113       if (cm->interp_filter == SWITCHABLE)
4114         rate2 += vp9_get_switchable_rate(cpi);
4115
4116       if (!mode_excluded)
4117         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4118                                   : cm->reference_mode == COMPOUND_REFERENCE;
4119
4120       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4121
4122       tmp_best_rdu = best_rd -
4123           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4124               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4125
4126       if (tmp_best_rdu > 0) {
4127         // If even the 'Y' rd value of split is higher than best so far
4128         // then dont bother looking at UV
4129         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4130                                         BLOCK_8X8);
4131         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4132                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
4133         if (rate_uv == INT_MAX)
4134           continue;
4135         rate2 += rate_uv;
4136         distortion2 += distortion_uv;
4137         skippable = skippable && uv_skippable;
4138         total_sse += uv_sse;
4139       }
4140     }
4141
4142     if (cm->reference_mode == REFERENCE_MODE_SELECT)
4143       rate2 += compmode_cost;
4144
4145     // Estimate the reference frame signaling cost and add it
4146     // to the rolling cost variable.
4147     if (second_ref_frame > INTRA_FRAME) {
4148       rate2 += ref_costs_comp[ref_frame];
4149     } else {
4150       rate2 += ref_costs_single[ref_frame];
4151     }
4152
4153     if (!disable_skip) {
4154       // Test for the condition where skip block will be activated
4155       // because there are no non zero coefficients and make any
4156       // necessary adjustment for rate. Ignore if skip is coded at
4157       // segment level as the cost wont have been added in.
4158       // Is Mb level skip allowed (i.e. not coded at segment level).
4159       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4160                                                          SEG_LVL_SKIP);
4161
4162       if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4163         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4164             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4165           // Add in the cost of the no skip flag.
4166           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4167         } else {
4168           // FIXME(rbultje) make this work for splitmv also
4169           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4170           distortion2 = total_sse;
4171           assert(total_sse >= 0);
4172           rate2 -= (rate_y + rate_uv);
4173           rate_y = 0;
4174           rate_uv = 0;
4175           this_skip2 = 1;
4176         }
4177       } else if (mb_skip_allowed) {
4178         // Add in the cost of the no skip flag.
4179         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4180       }
4181
4182       // Calculate the final RD estimate for this mode.
4183       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4184     }
4185
4186     // Keep record of best inter rd with single reference
4187     if (is_inter_block(mbmi) &&
4188         !has_second_ref(mbmi) &&
4189         !mode_excluded &&
4190         this_rd < best_inter_rd) {
4191       best_inter_rd = this_rd;
4192       best_inter_ref_frame = ref_frame;
4193     }
4194
4195     if (!disable_skip && ref_frame == INTRA_FRAME) {
4196       for (i = 0; i < REFERENCE_MODES; ++i)
4197         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4198       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4199         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4200     }
4201
4202     // Did this mode help.. i.e. is it the new best mode
4203     if (this_rd < best_rd || x->skip) {
4204       if (!mode_excluded) {
4205         int max_plane = MAX_MB_PLANE;
4206         // Note index of best mode so far
4207         best_ref_index = ref_index;
4208
4209         if (ref_frame == INTRA_FRAME) {
4210           /* required for left and above block mv */
4211           mbmi->mv[0].as_int = 0;
4212           max_plane = 1;
4213         }
4214
4215         *returnrate = rate2;
4216         *returndistortion = distortion2;
4217         best_rd = this_rd;
4218         best_yrd = best_rd -
4219                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4220         best_mbmode = *mbmi;
4221         best_skip2 = this_skip2;
4222         if (!x->select_txfm_size)
4223           swap_block_ptr(x, ctx, max_plane);
4224         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4225                    sizeof(uint8_t) * ctx->num_4x4_blk);
4226
4227         for (i = 0; i < 4; i++)
4228           best_bmodes[i] = xd->mi[0]->bmi[i];
4229
4230         // TODO(debargha): enhance this test with a better distortion prediction
4231         // based on qp, activity mask and history
4232         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4233             (ref_index > MIN_EARLY_TERM_INDEX)) {
4234           const int qstep = xd->plane[0].dequant[1];
4235           // TODO(debargha): Enhance this by specializing for each mode_index
4236           int scale = 4;
4237           if (x->source_variance < UINT_MAX) {
4238             const int var_adjust = (x->source_variance < 16);
4239             scale -= var_adjust;
4240           }
4241           if (ref_frame > INTRA_FRAME &&
4242               distortion2 * scale < qstep * qstep) {
4243             early_term = 1;
4244           }
4245         }
4246       }
4247     }
4248
4249     /* keep record of best compound/single-only prediction */
4250     if (!disable_skip && ref_frame != INTRA_FRAME) {
4251       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4252
4253       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4254         single_rate = rate2 - compmode_cost;
4255         hybrid_rate = rate2;
4256       } else {
4257         single_rate = rate2;
4258         hybrid_rate = rate2 + compmode_cost;
4259       }
4260
4261       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4262       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4263
4264       if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4265         best_pred_rd[SINGLE_REFERENCE] = single_rd;
4266       } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4267         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4268       }
4269       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4270         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4271     }
4272
4273     /* keep record of best filter type */
4274     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4275         cm->interp_filter != BILINEAR) {
4276       int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
4277                               SWITCHABLE_FILTERS : cm->interp_filter];
4278       int64_t adj_rd;
4279       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4280         if (ref == INT64_MAX)
4281           adj_rd = 0;
4282         else if (rd_opt->filter_cache[i] == INT64_MAX)
4283           // when early termination is triggered, the encoder does not have
4284           // access to the rate-distortion cost. it only knows that the cost
4285           // should be above the maximum valid value. hence it takes the known
4286           // maximum plus an arbitrary constant as the rate-distortion cost.
4287           adj_rd = rd_opt->mask_filter - ref + 10;
4288         else
4289           adj_rd = rd_opt->filter_cache[i] - ref;
4290
4291         adj_rd += this_rd;
4292         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4293       }
4294     }
4295
4296     if (early_term)
4297       break;
4298
4299     if (x->skip && !comp_pred)
4300       break;
4301   }
4302
4303   if (best_rd >= best_rd_so_far)
4304     return INT64_MAX;
4305
4306   // If we used an estimate for the uv intra rd in the loop above...
4307   if (cpi->sf.use_uv_intra_rd_estimate) {
4308     // Do Intra UV best rd mode selection if best mode choice above was intra.
4309     if (vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) {
4310       *mbmi = best_mbmode;
4311       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
4312                               &rate_uv_tokenonly,
4313                               &dist_uv,
4314                               &skip_uv,
4315                               BLOCK_8X8, TX_4X4);
4316     }
4317   }
4318
4319   if (best_rd == INT64_MAX) {
4320     *returnrate = INT_MAX;
4321     *returndistortion = INT64_MAX;
4322     return best_rd;
4323   }
4324
4325   assert((cm->interp_filter == SWITCHABLE) ||
4326          (cm->interp_filter == best_mbmode.interp_filter) ||
4327          !is_inter_block(&best_mbmode));
4328
4329   update_rd_thresh_fact(cpi, bsize, best_ref_index);
4330
4331   // macroblock modes
4332   *mbmi = best_mbmode;
4333   x->skip |= best_skip2;
4334   if (!is_inter_block(&best_mbmode)) {
4335     for (i = 0; i < 4; i++)
4336       xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4337   } else {
4338     for (i = 0; i < 4; ++i)
4339       vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4340
4341     mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4342     mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4343   }
4344
4345   for (i = 0; i < REFERENCE_MODES; ++i) {
4346     if (best_pred_rd[i] == INT64_MAX)
4347       best_pred_diff[i] = INT_MIN;
4348     else
4349       best_pred_diff[i] = best_rd - best_pred_rd[i];
4350   }
4351
4352   if (!x->skip) {
4353     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4354       if (best_filter_rd[i] == INT64_MAX)
4355         best_filter_diff[i] = 0;
4356       else
4357         best_filter_diff[i] = best_rd - best_filter_rd[i];
4358     }
4359     if (cm->interp_filter == SWITCHABLE)
4360       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4361   } else {
4362     vp9_zero(best_filter_diff);
4363   }
4364
4365   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4366   store_coding_context(x, ctx, best_ref_index,
4367                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4368                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4369                                       mbmi->ref_frame[1]][0],
4370                        best_pred_diff, best_tx_diff, best_filter_diff);
4371
4372   return best_rd;
4373 }
4374
4375 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
4376   int i;
4377   RD_OPT *const rd = &cpi->rd;
4378
4379   // Set baseline threshold values
4380   for (i = 0; i < MAX_MODES; ++i)
4381     rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
4382
4383   rd->thresh_mult[THR_NEARESTMV] = 0;
4384   rd->thresh_mult[THR_NEARESTG] = 0;
4385   rd->thresh_mult[THR_NEARESTA] = 0;
4386
4387   rd->thresh_mult[THR_DC] += 1000;
4388
4389   rd->thresh_mult[THR_NEWMV] += 1000;
4390   rd->thresh_mult[THR_NEWA] += 1000;
4391   rd->thresh_mult[THR_NEWG] += 1000;
4392
4393   rd->thresh_mult[THR_NEARMV] += 1000;
4394   rd->thresh_mult[THR_NEARA] += 1000;
4395   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
4396   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
4397
4398   rd->thresh_mult[THR_TM] += 1000;
4399
4400   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
4401   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
4402   rd->thresh_mult[THR_NEARG] += 1000;
4403   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
4404   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
4405
4406   rd->thresh_mult[THR_ZEROMV] += 2000;
4407   rd->thresh_mult[THR_ZEROG] += 2000;
4408   rd->thresh_mult[THR_ZEROA] += 2000;
4409   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
4410   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
4411
4412   rd->thresh_mult[THR_H_PRED] += 2000;
4413   rd->thresh_mult[THR_V_PRED] += 2000;
4414   rd->thresh_mult[THR_D45_PRED ] += 2500;
4415   rd->thresh_mult[THR_D135_PRED] += 2500;
4416   rd->thresh_mult[THR_D117_PRED] += 2500;
4417   rd->thresh_mult[THR_D153_PRED] += 2500;
4418   rd->thresh_mult[THR_D207_PRED] += 2500;
4419   rd->thresh_mult[THR_D63_PRED] += 2500;
4420
4421   /* disable frame modes if flags not set */
4422   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
4423     rd->thresh_mult[THR_NEWMV    ] = INT_MAX;
4424     rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
4425     rd->thresh_mult[THR_ZEROMV   ] = INT_MAX;
4426     rd->thresh_mult[THR_NEARMV   ] = INT_MAX;
4427   }
4428   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
4429     rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
4430     rd->thresh_mult[THR_ZEROG    ] = INT_MAX;
4431     rd->thresh_mult[THR_NEARG    ] = INT_MAX;
4432     rd->thresh_mult[THR_NEWG     ] = INT_MAX;
4433   }
4434   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
4435     rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
4436     rd->thresh_mult[THR_ZEROA    ] = INT_MAX;
4437     rd->thresh_mult[THR_NEARA    ] = INT_MAX;
4438     rd->thresh_mult[THR_NEWA     ] = INT_MAX;
4439   }
4440
4441   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4442       (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
4443     rd->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
4444     rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
4445     rd->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
4446     rd->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
4447   }
4448   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4449       (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
4450     rd->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
4451     rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
4452     rd->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
4453     rd->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
4454   }
4455 }
4456
4457 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
4458   const SPEED_FEATURES *const sf = &cpi->sf;
4459   RD_OPT *const rd = &cpi->rd;
4460   int i;
4461
4462   for (i = 0; i < MAX_REFS; ++i)
4463     rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode)  ? -500 : 0;
4464
4465   rd->thresh_mult_sub8x8[THR_LAST] += 2500;
4466   rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
4467   rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
4468   rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
4469   rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
4470   rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
4471
4472   // Check for masked out split cases.
4473   for (i = 0; i < MAX_REFS; i++)
4474     if (sf->disable_split_mask & (1 << i))
4475       rd->thresh_mult_sub8x8[i] = INT_MAX;
4476
4477   // disable mode test if frame flag is not set
4478   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
4479     rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
4480   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
4481     rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
4482   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
4483     rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
4484   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4485       (VP9_LAST_FLAG | VP9_ALT_FLAG))
4486     rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
4487   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4488       (VP9_GOLD_FLAG | VP9_ALT_FLAG))
4489     rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
4490 }