granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13
  14 #include "./vp9_rtcd.h"
  15
  16 #include "vpx_mem/vpx_mem.h"
  17
  18 #include "vp9/common/vp9_common.h"
  19 #include "vp9/common/vp9_entropy.h"
  20 #include "vp9/common/vp9_entropymode.h"
  21 #include "vp9/common/vp9_idct.h"
  22 #include "vp9/common/vp9_mvref_common.h"
  23 #include "vp9/common/vp9_pred_common.h"
  24 #include "vp9/common/vp9_quant_common.h"
  25 #include "vp9/common/vp9_reconinter.h"
  26 #include "vp9/common/vp9_reconintra.h"
  27 #include "vp9/common/vp9_seg_common.h"
  28 #include "vp9/common/vp9_systemdependent.h"
  29
  30 #include "vp9/encoder/vp9_cost.h"
  31 #include "vp9/encoder/vp9_encodemb.h"
  32 #include "vp9/encoder/vp9_encodemv.h"
  33 #include "vp9/encoder/vp9_encoder.h"
  34 #include "vp9/encoder/vp9_mcomp.h"
  35 #include "vp9/encoder/vp9_quantize.h"
  36 #include "vp9/encoder/vp9_ratectrl.h"
  37 #include "vp9/encoder/vp9_rd.h"
  38 #include "vp9/encoder/vp9_rdopt.h"
  39 #include "vp9/encoder/vp9_variance.h"
  40
  41 #define RD_THRESH_MAX_FACT 64
  42 #define RD_THRESH_INC      1
  43
  44 #define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
  45                                  (1 << INTRA_FRAME))
  46 #define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
  47                                  (1 << INTRA_FRAME))
  48 #define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
  49                                  (1 << INTRA_FRAME))
  50
  51 #define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
  52
  53 #define MIN_EARLY_TERM_INDEX    3
  54
  55 typedef struct {
  56   PREDICTION_MODE mode;
  57   MV_REFERENCE_FRAME ref_frame[2];
  58 } MODE_DEFINITION;
  59
  60 typedef struct {
  61   MV_REFERENCE_FRAME ref_frame[2];
  62 } REF_DEFINITION;
  63
  64 struct rdcost_block_args {
  65   MACROBLOCK *x;
  66   ENTROPY_CONTEXT t_above[16];
  67   ENTROPY_CONTEXT t_left[16];
  68   int rate;
  69   int64_t dist;
  70   int64_t sse;
  71   int this_rate;
  72   int64_t this_dist;
  73   int64_t this_sse;
  74   int64_t this_rd;
  75   int64_t best_rd;
  76   int skip;
  77   int use_fast_coef_costing;
  78   const scan_order *so;
  79 };
  80
  81 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  82   {NEARESTMV, {LAST_FRAME,   NONE}},
  83   {NEARESTMV, {ALTREF_FRAME, NONE}},
  84   {NEARESTMV, {GOLDEN_FRAME, NONE}},
  85
  86   {DC_PRED,   {INTRA_FRAME,  NONE}},
  87
  88   {NEWMV,     {LAST_FRAME,   NONE}},
  89   {NEWMV,     {ALTREF_FRAME, NONE}},
  90   {NEWMV,     {GOLDEN_FRAME, NONE}},
  91
  92   {NEARMV,    {LAST_FRAME,   NONE}},
  93   {NEARMV,    {ALTREF_FRAME, NONE}},
  94   {NEARMV,    {GOLDEN_FRAME, NONE}},
  95
  96   {ZEROMV,    {LAST_FRAME,   NONE}},
  97   {ZEROMV,    {GOLDEN_FRAME, NONE}},
  98   {ZEROMV,    {ALTREF_FRAME, NONE}},
  99
 100   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
 101   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
 102
 103   {TM_PRED,   {INTRA_FRAME,  NONE}},
 104
 105   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
 106   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
 107   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 108   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
 109
 110   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
 111   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 112
 113   {H_PRED,    {INTRA_FRAME,  NONE}},
 114   {V_PRED,    {INTRA_FRAME,  NONE}},
 115   {D135_PRED, {INTRA_FRAME,  NONE}},
 116   {D207_PRED, {INTRA_FRAME,  NONE}},
 117   {D153_PRED, {INTRA_FRAME,  NONE}},
 118   {D63_PRED,  {INTRA_FRAME,  NONE}},
 119   {D117_PRED, {INTRA_FRAME,  NONE}},
 120   {D45_PRED,  {INTRA_FRAME,  NONE}},
 121 };
 122
 123 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 124   {{LAST_FRAME,   NONE}},
 125   {{GOLDEN_FRAME, NONE}},
 126   {{ALTREF_FRAME, NONE}},
 127   {{LAST_FRAME,   ALTREF_FRAME}},
 128   {{GOLDEN_FRAME, ALTREF_FRAME}},
 129   {{INTRA_FRAME,  NONE}},
 130 };
 131
 132 static int raster_block_offset(BLOCK_SIZE plane_bsize,
 133                                int raster_block, int stride) {
 134   const int bw = b_width_log2(plane_bsize);
 135   const int y = 4 * (raster_block >> bw);
 136   const int x = 4 * (raster_block & ((1 << bw) - 1));
 137   return y * stride + x;
 138 }
 139 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 140                                           int raster_block, int16_t *base) {
 141   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 142   return base + raster_block_offset(plane_bsize, raster_block, stride);
 143 }
 144
 145 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
 146                            int m, int n, int min_plane, int max_plane) {
 147   int i;
 148
 149   for (i = min_plane; i < max_plane; ++i) {
 150     struct macroblock_plane *const p = &x->plane[i];
 151     struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
 152
 153     p->coeff    = ctx->coeff_pbuf[i][m];
 154     p->qcoeff   = ctx->qcoeff_pbuf[i][m];
 155     pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
 156     p->eobs     = ctx->eobs_pbuf[i][m];
 157
 158     ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
 159     ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
 160     ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
 161     ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];
 162
 163     ctx->coeff_pbuf[i][n]   = p->coeff;
 164     ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
 165     ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
 166     ctx->eobs_pbuf[i][n]    = p->eobs;
 167   }
 168 }
 169
 170 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 171                             MACROBLOCK *x, MACROBLOCKD *xd,
 172                             int *out_rate_sum, int64_t *out_dist_sum) {
 173   // Note our transform coeffs are 8 times an orthogonal transform.
 174   // Hence quantizer step is also 8 times. To get effective quantizer
 175   // we need to divide by 8 before sending to modeling function.
 176   int i;
 177   int64_t rate_sum = 0;
 178   int64_t dist_sum = 0;
 179   const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
 180   unsigned int sse;
 181   unsigned int var = 0;
 182   unsigned int sum_sse = 0;
 183   const int shift = 8;
 184   int rate;
 185   int64_t dist;
 186
 187   x->pred_sse[ref] = 0;
 188
 189   for (i = 0; i < MAX_MB_PLANE; ++i) {
 190     struct macroblock_plane *const p = &x->plane[i];
 191     struct macroblockd_plane *const pd = &xd->plane[i];
 192     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 193     const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 194     const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
 195     int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 196     int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 197     int idx, idy;
 198     int lw = b_width_log2_lookup[unit_size] + 2;
 199     int lh = b_height_log2_lookup[unit_size] + 2;
 200
 201     sum_sse = 0;
 202
 203     for (idy = 0; idy < bh; ++idy) {
 204       for (idx = 0; idx < bw; ++idx) {
 205         uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
 206         uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
 207         int block_idx = (idy << 1) + idx;
 208
 209         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
 210                                         dst, pd->dst.stride, &sse);
 211         x->bsse[(i << 2) + block_idx] = sse;
 212         sum_sse += sse;
 213
 214         if (!x->select_tx_size) {
 215           if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
 216             x->skip_txfm[(i << 2) + block_idx] = 1;
 217           else if (var < p->quant_thred[1] >> shift)
 218             x->skip_txfm[(i << 2) + block_idx] = 2;
 219           else
 220             x->skip_txfm[(i << 2) + block_idx] = 0;
 221         }
 222
 223         if (i == 0)
 224           x->pred_sse[ref] += sse;
 225       }
 226     }
 227
 228     // Fast approximate the modelling function.
 229     if (cpi->oxcf.speed > 4) {
 230       int64_t rate;
 231       int64_t dist;
 232       int64_t square_error = sse;
 233       int quantizer = (pd->dequant[1] >> 3);
 234
 235       if (quantizer < 120)
 236         rate = (square_error * (280 - quantizer)) >> 8;
 237       else
 238         rate = 0;
 239       dist = (square_error * quantizer) >> 8;
 240       rate_sum += rate;
 241       dist_sum += dist;
 242     } else {
 243       vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
 244                                    pd->dequant[1] >> 3, &rate, &dist);
 245       rate_sum += rate;
 246       dist_sum += dist;
 247     }
 248   }
 249
 250   *out_rate_sum = (int)rate_sum;
 251   *out_dist_sum = dist_sum << 4;
 252 }
 253
 254 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
 255                           intptr_t block_size, int64_t *ssz) {
 256   int i;
 257   int64_t error = 0, sqcoeff = 0;
 258
 259   for (i = 0; i < block_size; i++) {
 260     const int diff = coeff[i] - dqcoeff[i];
 261     error +=  diff * diff;
 262     sqcoeff += coeff[i] * coeff[i];
 263   }
 264
 265   *ssz = sqcoeff;
 266   return error;
 267 }
 268
 269 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
 270  * decide whether to include cost of a trailing EOB node or not (i.e. we
 271  * can skip this if the last coefficient in this transform block, e.g. the
 272  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 273  * were non-zero). */
 274 static const int16_t band_counts[TX_SIZES][8] = {
 275   { 1, 2, 3, 4,  3,   16 - 13, 0 },
 276   { 1, 2, 3, 4, 11,   64 - 21, 0 },
 277   { 1, 2, 3, 4, 11,  256 - 21, 0 },
 278   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 279 };
 280 static INLINE int cost_coeffs(MACROBLOCK *x,
 281                               int plane, int block,
 282                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
 283                               TX_SIZE tx_size,
 284                               const int16_t *scan, const int16_t *nb,
 285                               int use_fast_coef_costing) {
 286   MACROBLOCKD *const xd = &x->e_mbd;
 287   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
 288   const struct macroblock_plane *p = &x->plane[plane];
 289   const struct macroblockd_plane *pd = &xd->plane[plane];
 290   const PLANE_TYPE type = pd->plane_type;
 291   const int16_t *band_count = &band_counts[tx_size][1];
 292   const int eob = p->eobs[block];
 293   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 294   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
 295                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
 296   uint8_t token_cache[32 * 32];
 297   int pt = combine_entropy_contexts(*A, *L);
 298   int c, cost;
 299   // Check for consistency of tx_size with mode info
 300   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
 301                               : get_uv_tx_size(mbmi, pd) == tx_size);
 302
 303   if (eob == 0) {
 304     // single eob token
 305     cost = token_costs[0][0][pt][EOB_TOKEN];
 306     c = 0;
 307   } else {
 308     int band_left = *band_count++;
 309
 310     // dc token
 311     int v = qcoeff[0];
 312     int prev_t = vp9_dct_value_tokens_ptr[v].token;
 313     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
 314     token_cache[0] = vp9_pt_energy_class[prev_t];
 315     ++token_costs;
 316
 317     // ac tokens
 318     for (c = 1; c < eob; c++) {
 319       const int rc = scan[c];
 320       int t;
 321
 322       v = qcoeff[rc];
 323       t = vp9_dct_value_tokens_ptr[v].token;
 324       if (use_fast_coef_costing) {
 325         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
 326       } else {
 327         pt = get_coef_context(nb, token_cache, c);
 328         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
 329         token_cache[rc] = vp9_pt_energy_class[t];
 330       }
 331       prev_t = t;
 332       if (!--band_left) {
 333         band_left = *band_count++;
 334         ++token_costs;
 335       }
 336     }
 337
 338     // eob token
 339     if (band_left) {
 340       if (use_fast_coef_costing) {
 341         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
 342       } else {
 343         pt = get_coef_context(nb, token_cache, c);
 344         cost += (*token_costs)[0][pt][EOB_TOKEN];
 345       }
 346     }
 347   }
 348
 349   // is eob first coefficient;
 350   *A = *L = (c > 0);
 351
 352   return cost;
 353 }
 354 static void dist_block(int plane, int block, TX_SIZE tx_size,
 355                        struct rdcost_block_args* args) {
 356   const int ss_txfrm_size = tx_size << 1;
 357   MACROBLOCK* const x = args->x;
 358   MACROBLOCKD* const xd = &x->e_mbd;
 359   const struct macroblock_plane *const p = &x->plane[plane];
 360   const struct macroblockd_plane *const pd = &xd->plane[plane];
 361   int64_t this_sse;
 362   int shift = tx_size == TX_32X32 ? 0 : 2;
 363   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 364   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 365   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 366                                &this_sse) >> shift;
 367   args->sse  = this_sse >> shift;
 368
 369   if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
 370     // TODO(jingning): tune the model to better capture the distortion.
 371     int64_t p = (pd->dequant[1] * pd->dequant[1] *
 372                     (1 << ss_txfrm_size)) >> (shift + 2);
 373     args->dist += (p >> 4);
 374     args->sse  += p;
 375   }
 376 }
 377
 378 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 379                        TX_SIZE tx_size, struct rdcost_block_args* args) {
 380   int x_idx, y_idx;
 381   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 382
 383   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
 384                            args->t_left + y_idx, tx_size,
 385                            args->so->scan, args->so->neighbors,
 386                            args->use_fast_coef_costing);
 387 }
 388
 389 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 390                           TX_SIZE tx_size, void *arg) {
 391   struct rdcost_block_args *args = arg;
 392   MACROBLOCK *const x = args->x;
 393   MACROBLOCKD *const xd = &x->e_mbd;
 394   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 395   int64_t rd1, rd2, rd;
 396
 397   if (args->skip)
 398     return;
 399
 400   if (!is_inter_block(mbmi)) {
 401     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
 402     dist_block(plane, block, tx_size, args);
 403   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
 404     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
 405       // full forward transform and quantization
 406       vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 407       dist_block(plane, block, tx_size, args);
 408     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
 409       // compute DC coefficient
 410       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
 411       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
 412       vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
 413       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 414       args->dist = args->sse;
 415       if (!x->plane[plane].eobs[block])
 416         args->dist = args->sse - ((coeff[0] * coeff[0] -
 417             (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
 418     } else {
 419       // skip forward transform
 420       x->plane[plane].eobs[block] = 0;
 421       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 422       args->dist = args->sse;
 423     }
 424   } else {
 425     // full forward transform and quantization
 426     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 427     dist_block(plane, block, tx_size, args);
 428   }
 429
 430   rate_block(plane, block, plane_bsize, tx_size, args);
 431   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
 432   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
 433
 434   // TODO(jingning): temporarily enabled only for luma component
 435   rd = MIN(rd1, rd2);
 436   if (plane == 0)
 437     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
 438                                     (rd1 > rd2 && !xd->lossless);
 439
 440   args->this_rate += args->rate;
 441   args->this_dist += args->dist;
 442   args->this_sse  += args->sse;
 443   args->this_rd += rd;
 444
 445   if (args->this_rd > args->best_rd) {
 446     args->skip = 1;
 447     return;
 448   }
 449 }
 450
 451 static void txfm_rd_in_plane(MACROBLOCK *x,
 452                              int *rate, int64_t *distortion,
 453                              int *skippable, int64_t *sse,
 454                              int64_t ref_best_rd, int plane,
 455                              BLOCK_SIZE bsize, TX_SIZE tx_size,
 456                              int use_fast_coef_casting) {
 457   MACROBLOCKD *const xd = &x->e_mbd;
 458   const struct macroblockd_plane *const pd = &xd->plane[plane];
 459   struct rdcost_block_args args;
 460   vp9_zero(args);
 461   args.x = x;
 462   args.best_rd = ref_best_rd;
 463   args.use_fast_coef_costing = use_fast_coef_casting;
 464
 465   if (plane == 0)
 466     xd->mi[0].src_mi->mbmi.tx_size = tx_size;
 467
 468   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 469
 470   args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 471
 472   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
 473                                          block_rd_txfm, &args);
 474   if (args.skip) {
 475     *rate       = INT_MAX;
 476     *distortion = INT64_MAX;
 477     *sse        = INT64_MAX;
 478     *skippable  = 0;
 479   } else {
 480     *distortion = args.this_dist;
 481     *rate       = args.this_rate;
 482     *sse        = args.this_sse;
 483     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
 484   }
 485 }
 486
 487 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
 488                                    int *rate, int64_t *distortion,
 489                                    int *skip, int64_t *sse,
 490                                    int64_t ref_best_rd,
 491                                    BLOCK_SIZE bs) {
 492   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 493   VP9_COMMON *const cm = &cpi->common;
 494   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 495   MACROBLOCKD *const xd = &x->e_mbd;
 496   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 497
 498   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 499
 500   txfm_rd_in_plane(x, rate, distortion, skip,
 501                    sse, ref_best_rd, 0, bs,
 502                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 503 }
 504
 505 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 506                                    int *rate,
 507                                    int64_t *distortion,
 508                                    int *skip,
 509                                    int64_t *psse,
 510                                    int64_t tx_cache[TX_MODES],
 511                                    int64_t ref_best_rd,
 512                                    BLOCK_SIZE bs) {
 513   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 514   VP9_COMMON *const cm = &cpi->common;
 515   MACROBLOCKD *const xd = &x->e_mbd;
 516   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 517   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 518   int r[TX_SIZES][2], s[TX_SIZES];
 519   int64_t d[TX_SIZES], sse[TX_SIZES];
 520   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 521                              {INT64_MAX, INT64_MAX},
 522                              {INT64_MAX, INT64_MAX},
 523                              {INT64_MAX, INT64_MAX}};
 524   int n, m;
 525   int s0, s1;
 526   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 527   int64_t best_rd = INT64_MAX;
 528   TX_SIZE best_tx = max_tx_size;
 529
 530   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 531   assert(skip_prob > 0);
 532   s0 = vp9_cost_bit(skip_prob, 0);
 533   s1 = vp9_cost_bit(skip_prob, 1);
 534
 535   for (n = max_tx_size; n >= 0;  n--) {
 536     txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
 537                      &sse[n], ref_best_rd, 0, bs, n,
 538                      cpi->sf.use_fast_coef_costing);
 539     r[n][1] = r[n][0];
 540     if (r[n][0] < INT_MAX) {
 541       for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
 542         if (m == n)
 543           r[n][1] += vp9_cost_zero(tx_probs[m]);
 544         else
 545           r[n][1] += vp9_cost_one(tx_probs[m]);
 546       }
 547     }
 548     if (d[n] == INT64_MAX) {
 549       rd[n][0] = rd[n][1] = INT64_MAX;
 550     } else if (s[n]) {
 551       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 552     } else {
 553       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 554       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 555     }
 556
 557     // Early termination in transform size search.
 558     if (cpi->sf.tx_size_search_breakout &&
 559         (rd[n][1] == INT64_MAX ||
 560         (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
 561         s[n] == 1))
 562       break;
 563
 564     if (rd[n][1] < best_rd) {
 565       best_tx = n;
 566       best_rd = rd[n][1];
 567     }
 568   }
 569   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 570                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 571
 572
 573   *distortion = d[mbmi->tx_size];
 574   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
 575   *skip       = s[mbmi->tx_size];
 576   *psse       = sse[mbmi->tx_size];
 577
 578   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
 579   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
 580   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
 581   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
 582
 583   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 584     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 585   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
 586     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 587   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
 588     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
 589   } else {
 590     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
 591   }
 592 }
 593
 594 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 595                             int64_t *distortion, int *skip,
 596                             int64_t *psse, BLOCK_SIZE bs,
 597                             int64_t txfm_cache[TX_MODES],
 598                             int64_t ref_best_rd) {
 599   MACROBLOCKD *xd = &x->e_mbd;
 600   int64_t sse;
 601   int64_t *ret_sse = psse ? psse : &sse;
 602
 603   assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
 604
 605   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
 606     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
 607     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
 608                            bs);
 609   } else {
 610     choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
 611                            txfm_cache, ref_best_rd, bs);
 612   }
 613 }
 614
 615 static int conditional_skipintra(PREDICTION_MODE mode,
 616                                  PREDICTION_MODE best_intra_mode) {
 617   if (mode == D117_PRED &&
 618       best_intra_mode != V_PRED &&
 619       best_intra_mode != D135_PRED)
 620     return 1;
 621   if (mode == D63_PRED &&
 622       best_intra_mode != V_PRED &&
 623       best_intra_mode != D45_PRED)
 624     return 1;
 625   if (mode == D207_PRED &&
 626       best_intra_mode != H_PRED &&
 627       best_intra_mode != D45_PRED)
 628     return 1;
 629   if (mode == D153_PRED &&
 630       best_intra_mode != H_PRED &&
 631       best_intra_mode != D135_PRED)
 632     return 1;
 633   return 0;
 634 }
 635
 636 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 637                                      PREDICTION_MODE *best_mode,
 638                                      const int *bmode_costs,
 639                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
 640                                      int *bestrate, int *bestratey,
 641                                      int64_t *bestdistortion,
 642                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
 643   PREDICTION_MODE mode;
 644   MACROBLOCKD *const xd = &x->e_mbd;
 645   int64_t best_rd = rd_thresh;
 646
 647   struct macroblock_plane *p = &x->plane[0];
 648   struct macroblockd_plane *pd = &xd->plane[0];
 649   const int src_stride = p->src.stride;
 650   const int dst_stride = pd->dst.stride;
 651   const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
 652                                                             src_stride)];
 653   uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
 654                                                        dst_stride)];
 655   ENTROPY_CONTEXT ta[2], tempa[2];
 656   ENTROPY_CONTEXT tl[2], templ[2];
 657
 658   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 659   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 660   int idx, idy;
 661   uint8_t best_dst[8 * 8];
 662
 663   assert(ib < 4);
 664
 665   vpx_memcpy(ta, a, sizeof(ta));
 666   vpx_memcpy(tl, l, sizeof(tl));
 667   xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
 668
 669   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 670     int64_t this_rd;
 671     int ratey = 0;
 672     int64_t distortion = 0;
 673     int rate = bmode_costs[mode];
 674
 675     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
 676       continue;
 677
 678     // Only do the oblique modes if the best so far is
 679     // one of the neighboring directional modes
 680     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
 681       if (conditional_skipintra(mode, *best_mode))
 682           continue;
 683     }
 684
 685     vpx_memcpy(tempa, ta, sizeof(ta));
 686     vpx_memcpy(templ, tl, sizeof(tl));
 687
 688     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
 689       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
 690         const int block = ib + idy * 2 + idx;
 691         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
 692         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
 693         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
 694                                                             p->src_diff);
 695         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
 696         xd->mi[0].src_mi->bmi[block].as_mode = mode;
 697         vp9_predict_intra_block(xd, block, 1,
 698                                 TX_4X4, mode,
 699                                 x->skip_encode ? src : dst,
 700                                 x->skip_encode ? src_stride : dst_stride,
 701                                 dst, dst_stride, idx, idy, 0);
 702         vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 703
 704         if (xd->lossless) {
 705           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
 706           vp9_fwht4x4(src_diff, coeff, 8);
 707           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 708           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 709                                so->scan, so->neighbors,
 710                                cpi->sf.use_fast_coef_costing);
 711           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 712             goto next;
 713           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
 714                           p->eobs[block]);
 715         } else {
 716           int64_t unused;
 717           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
 718           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
 719           vp9_fht4x4(src_diff, coeff, 8, tx_type);
 720           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 721           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 722                              so->scan, so->neighbors,
 723                              cpi->sf.use_fast_coef_costing);
 724           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
 725                                         16, &unused) >> 2;
 726           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 727             goto next;
 728           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
 729                          dst, dst_stride, p->eobs[block]);
 730         }
 731       }
 732     }
 733
 734     rate += ratey;
 735     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 736
 737     if (this_rd < best_rd) {
 738       *bestrate = rate;
 739       *bestratey = ratey;
 740       *bestdistortion = distortion;
 741       best_rd = this_rd;
 742       *best_mode = mode;
 743       vpx_memcpy(a, tempa, sizeof(tempa));
 744       vpx_memcpy(l, templ, sizeof(templ));
 745       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 746         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
 747                    num_4x4_blocks_wide * 4);
 748     }
 749   next:
 750     {}
 751   }
 752
 753   if (best_rd >= rd_thresh || x->skip_encode)
 754     return best_rd;
 755
 756   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 757     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
 758                num_4x4_blocks_wide * 4);
 759
 760   return best_rd;
 761 }
 762
 763 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
 764                                             int *rate, int *rate_y,
 765                                             int64_t *distortion,
 766                                             int64_t best_rd) {
 767   int i, j;
 768   const MACROBLOCKD *const xd = &mb->e_mbd;
 769   MODE_INFO *const mic = xd->mi[0].src_mi;
 770   const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
 771   const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
 772   const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
 773   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 774   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 775   int idx, idy;
 776   int cost = 0;
 777   int64_t total_distortion = 0;
 778   int tot_rate_y = 0;
 779   int64_t total_rd = 0;
 780   ENTROPY_CONTEXT t_above[4], t_left[4];
 781   const int *bmode_costs = cpi->mbmode_cost;
 782
 783   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
 784   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 785
 786   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
 787   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
 788     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
 789       PREDICTION_MODE best_mode = DC_PRED;
 790       int r = INT_MAX, ry = INT_MAX;
 791       int64_t d = INT64_MAX, this_rd = INT64_MAX;
 792       i = idy * 2 + idx;
 793       if (cpi->common.frame_type == KEY_FRAME) {
 794         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
 795         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
 796
 797         bmode_costs  = cpi->y_mode_costs[A][L];
 798       }
 799
 800       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
 801                                       t_above + idx, t_left + idy, &r, &ry, &d,
 802                                       bsize, best_rd - total_rd);
 803       if (this_rd >= best_rd - total_rd)
 804         return INT64_MAX;
 805
 806       total_rd += this_rd;
 807       cost += r;
 808       total_distortion += d;
 809       tot_rate_y += ry;
 810
 811       mic->bmi[i].as_mode = best_mode;
 812       for (j = 1; j < num_4x4_blocks_high; ++j)
 813         mic->bmi[i + j * 2].as_mode = best_mode;
 814       for (j = 1; j < num_4x4_blocks_wide; ++j)
 815         mic->bmi[i + j].as_mode = best_mode;
 816
 817       if (total_rd >= best_rd)
 818         return INT64_MAX;
 819     }
 820   }
 821
 822   *rate = cost;
 823   *rate_y = tot_rate_y;
 824   *distortion = total_distortion;
 825   mic->mbmi.mode = mic->bmi[3].as_mode;
 826
 827   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
 828 }
 829
 830 // This function is used only for intra_only frames
 831 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
 832                                       int *rate, int *rate_tokenonly,
 833                                       int64_t *distortion, int *skippable,
 834                                       BLOCK_SIZE bsize,
 835                                       int64_t tx_cache[TX_MODES],
 836                                       int64_t best_rd) {
 837   PREDICTION_MODE mode;
 838   PREDICTION_MODE mode_selected = DC_PRED;
 839   MACROBLOCKD *const xd = &x->e_mbd;
 840   MODE_INFO *const mic = xd->mi[0].src_mi;
 841   int this_rate, this_rate_tokenonly, s;
 842   int64_t this_distortion, this_rd;
 843   TX_SIZE best_tx = TX_4X4;
 844   int i;
 845   int *bmode_costs;
 846   const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
 847   const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
 848   const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
 849   const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
 850   bmode_costs = cpi->y_mode_costs[A][L];
 851
 852   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
 853     for (i = 0; i < TX_MODES; i++)
 854       tx_cache[i] = INT64_MAX;
 855
 856   /* Y Search for intra prediction mode */
 857   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 858     int64_t local_tx_cache[TX_MODES];
 859     mic->mbmi.mode = mode;
 860
 861     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
 862         &s, NULL, bsize, local_tx_cache, best_rd);
 863
 864     if (this_rate_tokenonly == INT_MAX)
 865       continue;
 866
 867     this_rate = this_rate_tokenonly + bmode_costs[mode];
 868     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 869
 870     if (this_rd < best_rd) {
 871       mode_selected   = mode;
 872       best_rd         = this_rd;
 873       best_tx         = mic->mbmi.tx_size;
 874       *rate           = this_rate;
 875       *rate_tokenonly = this_rate_tokenonly;
 876       *distortion     = this_distortion;
 877       *skippable      = s;
 878     }
 879
 880     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
 881       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
 882         const int64_t adj_rd = this_rd + local_tx_cache[i] -
 883             local_tx_cache[cpi->common.tx_mode];
 884         if (adj_rd < tx_cache[i]) {
 885           tx_cache[i] = adj_rd;
 886         }
 887       }
 888     }
 889   }
 890
 891   mic->mbmi.mode = mode_selected;
 892   mic->mbmi.tx_size = best_tx;
 893
 894   return best_rd;
 895 }
 896
 897 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
 898                              int *rate, int64_t *distortion, int *skippable,
 899                              int64_t *sse, BLOCK_SIZE bsize,
 900                              int64_t ref_best_rd) {
 901   MACROBLOCKD *const xd = &x->e_mbd;
 902   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 903   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
 904   int plane;
 905   int pnrate = 0, pnskip = 1;
 906   int64_t pndist = 0, pnsse = 0;
 907
 908   if (ref_best_rd < 0)
 909     goto term;
 910
 911   if (is_inter_block(mbmi)) {
 912     int plane;
 913     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
 914       vp9_subtract_plane(x, bsize, plane);
 915   }
 916
 917   *rate = 0;
 918   *distortion = 0;
 919   *sse = 0;
 920   *skippable = 1;
 921
 922   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
 923     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
 924                      ref_best_rd, plane, bsize, uv_tx_size,
 925                      cpi->sf.use_fast_coef_costing);
 926     if (pnrate == INT_MAX)
 927       goto term;
 928     *rate += pnrate;
 929     *distortion += pndist;
 930     *sse += pnsse;
 931     *skippable &= pnskip;
 932   }
 933   return;
 934
 935   term:
 936   *rate = INT_MAX;
 937   *distortion = INT64_MAX;
 938   *sse = INT64_MAX;
 939   *skippable = 0;
 940   return;
 941 }
 942
 943 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 944                                        PICK_MODE_CONTEXT *ctx,
 945                                        int *rate, int *rate_tokenonly,
 946                                        int64_t *distortion, int *skippable,
 947                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
 948   MACROBLOCKD *xd = &x->e_mbd;
 949   PREDICTION_MODE mode;
 950   PREDICTION_MODE mode_selected = DC_PRED;
 951   int64_t best_rd = INT64_MAX, this_rd;
 952   int this_rate_tokenonly, this_rate, s;
 953   int64_t this_distortion, this_sse;
 954
 955   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 956     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
 957       continue;
 958
 959     xd->mi[0].src_mi->mbmi.uv_mode = mode;
 960
 961     super_block_uvrd(cpi, x, &this_rate_tokenonly,
 962                      &this_distortion, &s, &this_sse, bsize, best_rd);
 963     if (this_rate_tokenonly == INT_MAX)
 964       continue;
 965     this_rate = this_rate_tokenonly +
 966                 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
 967     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 968
 969     if (this_rd < best_rd) {
 970       mode_selected   = mode;
 971       best_rd         = this_rd;
 972       *rate           = this_rate;
 973       *rate_tokenonly = this_rate_tokenonly;
 974       *distortion     = this_distortion;
 975       *skippable      = s;
 976       if (!x->select_tx_size)
 977         swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
 978     }
 979   }
 980
 981   xd->mi[0].src_mi->mbmi.uv_mode = mode_selected;
 982   return best_rd;
 983 }
 984
 985 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
 986                               int *rate, int *rate_tokenonly,
 987                               int64_t *distortion, int *skippable,
 988                               BLOCK_SIZE bsize) {
 989   const VP9_COMMON *cm = &cpi->common;
 990   int64_t unused;
 991
 992   x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
 993   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
 994                    skippable, &unused, bsize, INT64_MAX);
 995   *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
 996   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 997 }
 998
 999 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1000                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1001                                  int *rate_uv, int *rate_uv_tokenonly,
1002                                  int64_t *dist_uv, int *skip_uv,
1003                                  PREDICTION_MODE *mode_uv) {
1004   MACROBLOCK *const x = &cpi->mb;
1005
1006   // Use an estimated rd for uv_intra based on DC_PRED if the
1007   // appropriate speed flag is set.
1008   if (cpi->sf.use_uv_intra_rd_estimate) {
1009     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1010                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1011   // Else do a proper rd search for each possible transform size that may
1012   // be considered in the main rd loop.
1013   } else {
1014     rd_pick_intra_sbuv_mode(cpi, x, ctx,
1015                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1016                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1017   }
1018   *mode_uv = x->e_mbd.mi[0].src_mi->mbmi.uv_mode;
1019 }
1020
1021 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1022                        int mode_context) {
1023   assert(is_inter_mode(mode));
1024   return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1025 }
1026
1027 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1028                                 BLOCK_SIZE bsize,
1029                                 int_mv *frame_mv,
1030                                 int mi_row, int mi_col,
1031                                 int_mv single_newmv[MAX_REF_FRAMES],
1032                                 int *rate_mv);
1033
1034 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1035                                 PREDICTION_MODE mode, int_mv this_mv[2],
1036                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1037                                 int_mv seg_mvs[MAX_REF_FRAMES],
1038                                 int_mv *best_ref_mv[2], const int *mvjcost,
1039                                 int *mvcost[2]) {
1040   MODE_INFO *const mic = xd->mi[0].src_mi;
1041   const MB_MODE_INFO *const mbmi = &mic->mbmi;
1042   int thismvcost = 0;
1043   int idx, idy;
1044   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1045   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1046   const int is_compound = has_second_ref(mbmi);
1047
1048   switch (mode) {
1049     case NEWMV:
1050       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1051       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1052                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1053       if (is_compound) {
1054         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1055         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1056                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1057       }
1058       break;
1059     case NEARMV:
1060     case NEARESTMV:
1061       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1062       if (is_compound)
1063         this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1064       break;
1065     case ZEROMV:
1066       this_mv[0].as_int = 0;
1067       if (is_compound)
1068         this_mv[1].as_int = 0;
1069       break;
1070     default:
1071       break;
1072   }
1073
1074   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1075   if (is_compound)
1076     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1077
1078   mic->bmi[i].as_mode = mode;
1079
1080   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1081     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1082       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1083                  &mic->bmi[i], sizeof(mic->bmi[i]));
1084
1085   return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1086             thismvcost;
1087 }
1088
1089 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1090                                        MACROBLOCK *x,
1091                                        int64_t best_yrd,
1092                                        int i,
1093                                        int *labelyrate,
1094                                        int64_t *distortion, int64_t *sse,
1095                                        ENTROPY_CONTEXT *ta,
1096                                        ENTROPY_CONTEXT *tl,
1097                                        int mi_row, int mi_col) {
1098   int k;
1099   MACROBLOCKD *xd = &x->e_mbd;
1100   struct macroblockd_plane *const pd = &xd->plane[0];
1101   struct macroblock_plane *const p = &x->plane[0];
1102   MODE_INFO *const mi = xd->mi[0].src_mi;
1103   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1104   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1105   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1106   int idx, idy;
1107
1108   const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1109                                                              p->src.stride)];
1110   uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1111                                                         pd->dst.stride)];
1112   int64_t thisdistortion = 0, thissse = 0;
1113   int thisrate = 0, ref;
1114   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1115   const int is_compound = has_second_ref(&mi->mbmi);
1116   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1117
1118   for (ref = 0; ref < 1 + is_compound; ++ref) {
1119     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1120                                                pd->pre[ref].stride)];
1121     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1122                               dst, pd->dst.stride,
1123                               &mi->bmi[i].as_mv[ref].as_mv,
1124                               &xd->block_refs[ref]->sf, width, height, ref,
1125                               kernel, MV_PRECISION_Q3,
1126                               mi_col * MI_SIZE + 4 * (i % 2),
1127                               mi_row * MI_SIZE + 4 * (i / 2));
1128   }
1129
1130   vp9_subtract_block(height, width,
1131                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1132                      src, p->src.stride,
1133                      dst, pd->dst.stride);
1134
1135   k = i;
1136   for (idy = 0; idy < height / 4; ++idy) {
1137     for (idx = 0; idx < width / 4; ++idx) {
1138       int64_t ssz, rd, rd1, rd2;
1139       tran_low_t* coeff;
1140
1141       k += (idy * 2 + idx);
1142       coeff = BLOCK_OFFSET(p->coeff, k);
1143       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1144                     coeff, 8);
1145       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1146       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1147                                         16, &ssz);
1148       thissse += ssz;
1149       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1150                               so->scan, so->neighbors,
1151                               cpi->sf.use_fast_coef_costing);
1152       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1153       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1154       rd = MIN(rd1, rd2);
1155       if (rd >= best_yrd)
1156         return INT64_MAX;
1157     }
1158   }
1159
1160   *distortion = thisdistortion >> 2;
1161   *labelyrate = thisrate;
1162   *sse = thissse >> 2;
1163
1164   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1165 }
1166
1167 typedef struct {
1168   int eobs;
1169   int brate;
1170   int byrate;
1171   int64_t bdist;
1172   int64_t bsse;
1173   int64_t brdcost;
1174   int_mv mvs[2];
1175   ENTROPY_CONTEXT ta[2];
1176   ENTROPY_CONTEXT tl[2];
1177 } SEG_RDSTAT;
1178
1179 typedef struct {
1180   int_mv *ref_mv[2];
1181   int_mv mvp;
1182
1183   int64_t segment_rd;
1184   int r;
1185   int64_t d;
1186   int64_t sse;
1187   int segment_yrate;
1188   PREDICTION_MODE modes[4];
1189   SEG_RDSTAT rdstat[4][INTER_MODES];
1190   int mvthresh;
1191 } BEST_SEG_INFO;
1192
1193 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1194   return (mv->row >> 3) < x->mv_row_min ||
1195          (mv->row >> 3) > x->mv_row_max ||
1196          (mv->col >> 3) < x->mv_col_min ||
1197          (mv->col >> 3) > x->mv_col_max;
1198 }
1199
1200 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1201   MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
1202   struct macroblock_plane *const p = &x->plane[0];
1203   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1204
1205   p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1206   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1207   pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1208                                                        pd->pre[0].stride)];
1209   if (has_second_ref(mbmi))
1210     pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1211                                                          pd->pre[1].stride)];
1212 }
1213
1214 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1215                                   struct buf_2d orig_pre[2]) {
1216   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
1217   x->plane[0].src = orig_src;
1218   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1219   if (has_second_ref(mbmi))
1220     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1221 }
1222
1223 static INLINE int mv_has_subpel(const MV *mv) {
1224   return (mv->row & 0x0F) || (mv->col & 0x0F);
1225 }
1226
1227 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1228 // TODO(aconverse): Find out if this is still productive then clean up or remove
1229 static int check_best_zero_mv(
1230     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1231     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
1232     const MV_REFERENCE_FRAME ref_frames[2]) {
1233   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1234       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1235       (ref_frames[1] == NONE ||
1236        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1237     int rfc = mode_context[ref_frames[0]];
1238     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1239     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1240     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1241
1242     if (this_mode == NEARMV) {
1243       if (c1 > c3) return 0;
1244     } else if (this_mode == NEARESTMV) {
1245       if (c2 > c3) return 0;
1246     } else {
1247       assert(this_mode == ZEROMV);
1248       if (ref_frames[1] == NONE) {
1249         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1250             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1251           return 0;
1252       } else {
1253         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1254              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1255             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1256              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1257           return 0;
1258       }
1259     }
1260   }
1261   return 1;
1262 }
1263
1264 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1265                                         const TileInfo * const tile,
1266                                         int_mv *best_ref_mv,
1267                                         int_mv *second_best_ref_mv,
1268                                         int64_t best_rd, int *returntotrate,
1269                                         int *returnyrate,
1270                                         int64_t *returndistortion,
1271                                         int *skippable, int64_t *psse,
1272                                         int mvthresh,
1273                                         int_mv seg_mvs[4][MAX_REF_FRAMES],
1274                                         BEST_SEG_INFO *bsi_buf, int filter_idx,
1275                                         int mi_row, int mi_col) {
1276   int i;
1277   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1278   MACROBLOCKD *xd = &x->e_mbd;
1279   MODE_INFO *mi = xd->mi[0].src_mi;
1280   MB_MODE_INFO *mbmi = &mi->mbmi;
1281   int mode_idx;
1282   int k, br = 0, idx, idy;
1283   int64_t bd = 0, block_sse = 0;
1284   PREDICTION_MODE this_mode;
1285   VP9_COMMON *cm = &cpi->common;
1286   struct macroblock_plane *const p = &x->plane[0];
1287   struct macroblockd_plane *const pd = &xd->plane[0];
1288   const int label_count = 4;
1289   int64_t this_segment_rd = 0;
1290   int label_mv_thresh;
1291   int segmentyrate = 0;
1292   const BLOCK_SIZE bsize = mbmi->sb_type;
1293   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1294   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1295   ENTROPY_CONTEXT t_above[2], t_left[2];
1296   int subpelmv = 1, have_ref = 0;
1297   const int has_second_rf = has_second_ref(mbmi);
1298   const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
1299
1300   vp9_zero(*bsi);
1301
1302   bsi->segment_rd = best_rd;
1303   bsi->ref_mv[0] = best_ref_mv;
1304   bsi->ref_mv[1] = second_best_ref_mv;
1305   bsi->mvp.as_int = best_ref_mv->as_int;
1306   bsi->mvthresh = mvthresh;
1307
1308   for (i = 0; i < 4; i++)
1309     bsi->modes[i] = ZEROMV;
1310
1311   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1312   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1313
1314   // 64 makes this threshold really big effectively
1315   // making it so that we very rarely check mvs on
1316   // segments.   setting this to 1 would make mv thresh
1317   // roughly equal to what it is for macroblocks
1318   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1319
1320   // Segmentation method overheads
1321   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1322     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1323       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1324       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1325       int_mv mode_mv[MB_MODE_COUNT][2];
1326       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1327       PREDICTION_MODE mode_selected = ZEROMV;
1328       int64_t best_rd = INT64_MAX;
1329       const int i = idy * 2 + idx;
1330       int ref;
1331
1332       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1333         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1334         frame_mv[ZEROMV][frame].as_int = 0;
1335         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1336                                       &frame_mv[NEARESTMV][frame],
1337                                       &frame_mv[NEARMV][frame]);
1338       }
1339
1340       // search for the best motion vector on this segment
1341       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1342         const struct buf_2d orig_src = x->plane[0].src;
1343         struct buf_2d orig_pre[2];
1344
1345         mode_idx = INTER_OFFSET(this_mode);
1346         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1347         if (!(inter_mode_mask & (1 << this_mode)))
1348           continue;
1349
1350         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1351                                 this_mode, mbmi->ref_frame))
1352           continue;
1353
1354         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1355         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1356                    sizeof(bsi->rdstat[i][mode_idx].ta));
1357         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1358                    sizeof(bsi->rdstat[i][mode_idx].tl));
1359
1360         // motion search for newmv (single predictor case only)
1361         if (!has_second_rf && this_mode == NEWMV &&
1362             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1363           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1364           int step_param = 0;
1365           int thissme, bestsme = INT_MAX;
1366           int sadpb = x->sadperbit4;
1367           MV mvp_full;
1368           int max_mv;
1369           int sad_list[5];
1370
1371           /* Is the best so far sufficiently good that we cant justify doing
1372            * and new motion search. */
1373           if (best_rd < label_mv_thresh)
1374             break;
1375
1376           if (cpi->oxcf.mode != BEST) {
1377             // use previous block's result as next block's MV predictor.
1378             if (i > 0) {
1379               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1380               if (i == 2)
1381                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1382             }
1383           }
1384           if (i == 0)
1385             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1386           else
1387             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1388
1389           if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1390             // Take wtd average of the step_params based on the last frame's
1391             // max mv magnitude and the best ref mvs of the current block for
1392             // the given reference.
1393             step_param = (vp9_init_search_range(max_mv) +
1394                               cpi->mv_step_param) / 2;
1395           } else {
1396             step_param = cpi->mv_step_param;
1397           }
1398
1399           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1400           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1401
1402           if (cpi->sf.adaptive_motion_search) {
1403             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
1404             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
1405             step_param = MAX(step_param, 8);
1406           }
1407
1408           // adjust src pointer for this block
1409           mi_buf_shift(x, i);
1410
1411           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1412
1413           bestsme = vp9_full_pixel_search(
1414               cpi, x, bsize, &mvp_full, step_param, sadpb,
1415               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
1416               &bsi->ref_mv[0]->as_mv, new_mv,
1417               INT_MAX, 1);
1418
1419           // Should we do a full search (best quality only)
1420           if (cpi->oxcf.mode == BEST) {
1421             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1422             /* Check if mvp_full is within the range. */
1423             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1424                      x->mv_row_min, x->mv_row_max);
1425             thissme = cpi->full_search_sad(x, &mvp_full,
1426                                            sadpb, 16, &cpi->fn_ptr[bsize],
1427                                            &bsi->ref_mv[0]->as_mv,
1428                                            &best_mv->as_mv);
1429             sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
1430             if (thissme < bestsme) {
1431               bestsme = thissme;
1432               *new_mv = best_mv->as_mv;
1433             } else {
1434               // The full search result is actually worse so re-instate the
1435               // previous best vector
1436               best_mv->as_mv = *new_mv;
1437             }
1438           }
1439
1440           if (bestsme < INT_MAX) {
1441             int distortion;
1442             cpi->find_fractional_mv_step(
1443                 x,
1444                 new_mv,
1445                 &bsi->ref_mv[0]->as_mv,
1446                 cm->allow_high_precision_mv,
1447                 x->errorperbit, &cpi->fn_ptr[bsize],
1448                 cpi->sf.mv.subpel_force_stop,
1449                 cpi->sf.mv.subpel_iters_per_step,
1450                 cond_sad_list(cpi, sad_list),
1451                 x->nmvjointcost, x->mvcost,
1452                 &distortion,
1453                 &x->pred_sse[mbmi->ref_frame[0]],
1454                 NULL, 0, 0);
1455
1456             // save motion search result for use in compound prediction
1457             seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1458           }
1459
1460           if (cpi->sf.adaptive_motion_search)
1461             x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
1462
1463           // restore src pointers
1464           mi_buf_restore(x, orig_src, orig_pre);
1465         }
1466
1467         if (has_second_rf) {
1468           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1469               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1470             continue;
1471         }
1472
1473         if (has_second_rf && this_mode == NEWMV &&
1474             mbmi->interp_filter == EIGHTTAP) {
1475           // adjust src pointers
1476           mi_buf_shift(x, i);
1477           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1478             int rate_mv;
1479             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1480                                 mi_row, mi_col, seg_mvs[i],
1481                                 &rate_mv);
1482             seg_mvs[i][mbmi->ref_frame[0]].as_int =
1483                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1484             seg_mvs[i][mbmi->ref_frame[1]].as_int =
1485                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1486           }
1487           // restore src pointers
1488           mi_buf_restore(x, orig_src, orig_pre);
1489         }
1490
1491         bsi->rdstat[i][mode_idx].brate =
1492             set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1493                                  frame_mv, seg_mvs[i], bsi->ref_mv,
1494                                  x->nmvjointcost, x->mvcost);
1495
1496         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1497           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1498               mode_mv[this_mode][ref].as_int;
1499           if (num_4x4_blocks_wide > 1)
1500             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1501                 mode_mv[this_mode][ref].as_int;
1502           if (num_4x4_blocks_high > 1)
1503             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1504                 mode_mv[this_mode][ref].as_int;
1505         }
1506
1507         // Trap vectors that reach beyond the UMV borders
1508         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1509             (has_second_rf &&
1510              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1511           continue;
1512
1513         if (filter_idx > 0) {
1514           BEST_SEG_INFO *ref_bsi = bsi_buf;
1515           subpelmv = 0;
1516           have_ref = 1;
1517
1518           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1519             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1520             have_ref &= mode_mv[this_mode][ref].as_int ==
1521                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1522           }
1523
1524           if (filter_idx > 1 && !subpelmv && !have_ref) {
1525             ref_bsi = bsi_buf + 1;
1526             have_ref = 1;
1527             for (ref = 0; ref < 1 + has_second_rf; ++ref)
1528               have_ref &= mode_mv[this_mode][ref].as_int ==
1529                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1530           }
1531
1532           if (!subpelmv && have_ref &&
1533               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1534             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1535                        sizeof(SEG_RDSTAT));
1536             if (num_4x4_blocks_wide > 1)
1537               bsi->rdstat[i + 1][mode_idx].eobs =
1538                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
1539             if (num_4x4_blocks_high > 1)
1540               bsi->rdstat[i + 2][mode_idx].eobs =
1541                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
1542
1543             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1544               mode_selected = this_mode;
1545               best_rd = bsi->rdstat[i][mode_idx].brdcost;
1546             }
1547             continue;
1548           }
1549         }
1550
1551         bsi->rdstat[i][mode_idx].brdcost =
1552             encode_inter_mb_segment(cpi, x,
1553                                     bsi->segment_rd - this_segment_rd, i,
1554                                     &bsi->rdstat[i][mode_idx].byrate,
1555                                     &bsi->rdstat[i][mode_idx].bdist,
1556                                     &bsi->rdstat[i][mode_idx].bsse,
1557                                     bsi->rdstat[i][mode_idx].ta,
1558                                     bsi->rdstat[i][mode_idx].tl,
1559                                     mi_row, mi_col);
1560         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1561           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1562                                             bsi->rdstat[i][mode_idx].brate, 0);
1563           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1564           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
1565           if (num_4x4_blocks_wide > 1)
1566             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
1567           if (num_4x4_blocks_high > 1)
1568             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
1569         }
1570
1571         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1572           mode_selected = this_mode;
1573           best_rd = bsi->rdstat[i][mode_idx].brdcost;
1574         }
1575       } /*for each 4x4 mode*/
1576
1577       if (best_rd == INT64_MAX) {
1578         int iy, midx;
1579         for (iy = i + 1; iy < 4; ++iy)
1580           for (midx = 0; midx < INTER_MODES; ++midx)
1581             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1582         bsi->segment_rd = INT64_MAX;
1583         return INT64_MAX;;
1584       }
1585
1586       mode_idx = INTER_OFFSET(mode_selected);
1587       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1588       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1589
1590       set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
1591                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
1592                            x->mvcost);
1593
1594       br += bsi->rdstat[i][mode_idx].brate;
1595       bd += bsi->rdstat[i][mode_idx].bdist;
1596       block_sse += bsi->rdstat[i][mode_idx].bsse;
1597       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
1598       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
1599
1600       if (this_segment_rd > bsi->segment_rd) {
1601         int iy, midx;
1602         for (iy = i + 1; iy < 4; ++iy)
1603           for (midx = 0; midx < INTER_MODES; ++midx)
1604             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1605         bsi->segment_rd = INT64_MAX;
1606         return INT64_MAX;;
1607       }
1608     }
1609   } /* for each label */
1610
1611   bsi->r = br;
1612   bsi->d = bd;
1613   bsi->segment_yrate = segmentyrate;
1614   bsi->segment_rd = this_segment_rd;
1615   bsi->sse = block_sse;
1616
1617   // update the coding decisions
1618   for (k = 0; k < 4; ++k)
1619     bsi->modes[k] = mi->bmi[k].as_mode;
1620
1621   if (bsi->segment_rd > best_rd)
1622     return INT64_MAX;
1623   /* set it to the best */
1624   for (i = 0; i < 4; i++) {
1625     mode_idx = INTER_OFFSET(bsi->modes[i]);
1626     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
1627     if (has_second_ref(mbmi))
1628       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
1629     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
1630     mi->bmi[i].as_mode = bsi->modes[i];
1631   }
1632
1633   /*
1634    * used to set mbmi->mv.as_int
1635    */
1636   *returntotrate = bsi->r;
1637   *returndistortion = bsi->d;
1638   *returnyrate = bsi->segment_yrate;
1639   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
1640   *psse = bsi->sse;
1641   mbmi->mode = bsi->modes[3];
1642
1643   return bsi->segment_rd;
1644 }
1645
1646 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
1647                                      const MACROBLOCKD *xd,
1648                                      int segment_id,
1649                                      unsigned int *ref_costs_single,
1650                                      unsigned int *ref_costs_comp,
1651                                      vp9_prob *comp_mode_p) {
1652   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
1653                                              SEG_LVL_REF_FRAME);
1654   if (seg_ref_active) {
1655     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
1656     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
1657     *comp_mode_p = 128;
1658   } else {
1659     vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
1660     vp9_prob comp_inter_p = 128;
1661
1662     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
1663       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
1664       *comp_mode_p = comp_inter_p;
1665     } else {
1666       *comp_mode_p = 128;
1667     }
1668
1669     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
1670
1671     if (cm->reference_mode != COMPOUND_REFERENCE) {
1672       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
1673       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1674       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1675
1676       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1677         base_cost += vp9_cost_bit(comp_inter_p, 0);
1678
1679       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
1680           ref_costs_single[ALTREF_FRAME] = base_cost;
1681       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
1682       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1683       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1684       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
1685       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
1686     } else {
1687       ref_costs_single[LAST_FRAME]   = 512;
1688       ref_costs_single[GOLDEN_FRAME] = 512;
1689       ref_costs_single[ALTREF_FRAME] = 512;
1690     }
1691     if (cm->reference_mode != SINGLE_REFERENCE) {
1692       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
1693       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1694
1695       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1696         base_cost += vp9_cost_bit(comp_inter_p, 1);
1697
1698       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
1699       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
1700     } else {
1701       ref_costs_comp[LAST_FRAME]   = 512;
1702       ref_costs_comp[GOLDEN_FRAME] = 512;
1703     }
1704   }
1705 }
1706
1707 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1708                          int mode_index,
1709                          int64_t comp_pred_diff[REFERENCE_MODES],
1710                          const int64_t tx_size_diff[TX_MODES],
1711                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
1712                          int skippable) {
1713   MACROBLOCKD *const xd = &x->e_mbd;
1714
1715   // Take a snapshot of the coding context so it can be
1716   // restored if we decide to encode this way
1717   ctx->skip = x->skip;
1718   ctx->skippable = skippable;
1719   ctx->best_mode_index = mode_index;
1720   ctx->mic = *xd->mi[0].src_mi;
1721   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
1722   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
1723   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
1724
1725   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
1726   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
1727              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
1728 }
1729
1730 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1731                                const TileInfo *const tile,
1732                                MV_REFERENCE_FRAME ref_frame,
1733                                BLOCK_SIZE block_size,
1734                                int mi_row, int mi_col,
1735                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
1736                                int_mv frame_near_mv[MAX_REF_FRAMES],
1737                                struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
1738   const VP9_COMMON *cm = &cpi->common;
1739   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
1740   MACROBLOCKD *const xd = &x->e_mbd;
1741   MODE_INFO *const mi = xd->mi[0].src_mi;
1742   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
1743   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
1744
1745   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1746   // use the UV scaling factors.
1747   vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
1748
1749   // Gets an initial list of candidate vectors from neighbours and orders them
1750   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
1751
1752   // Candidate refinement carried out at encoder and decoder
1753   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
1754                         &frame_nearest_mv[ref_frame],
1755                         &frame_near_mv[ref_frame]);
1756
1757   // Further refinement that is encode side only to test the top few candidates
1758   // in full and choose the best as the centre point for subsequent searches.
1759   // The current implementation doesn't support scaling.
1760   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
1761     vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
1762                 ref_frame, block_size);
1763 }
1764
1765 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1766                                  BLOCK_SIZE bsize,
1767                                  int mi_row, int mi_col,
1768                                  int_mv *tmp_mv, int *rate_mv) {
1769   MACROBLOCKD *xd = &x->e_mbd;
1770   const VP9_COMMON *cm = &cpi->common;
1771   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
1772   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
1773   int bestsme = INT_MAX;
1774   int step_param;
1775   int sadpb = x->sadperbit16;
1776   MV mvp_full;
1777   int ref = mbmi->ref_frame[0];
1778   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
1779
1780   int tmp_col_min = x->mv_col_min;
1781   int tmp_col_max = x->mv_col_max;
1782   int tmp_row_min = x->mv_row_min;
1783   int tmp_row_max = x->mv_row_max;
1784   int sad_list[5];
1785
1786   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
1787                                                                         ref);
1788
1789   MV pred_mv[3];
1790   pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
1791   pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
1792   pred_mv[2] = x->pred_mv[ref];
1793
1794   if (scaled_ref_frame) {
1795     int i;
1796     // Swap out the reference frame for a version that's been scaled to
1797     // match the resolution of the current frame, allowing the existing
1798     // motion search code to be used without additional modifications.
1799     for (i = 0; i < MAX_MB_PLANE; i++)
1800       backup_yv12[i] = xd->plane[i].pre[0];
1801
1802     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1803   }
1804
1805   vp9_set_mv_search_range(x, &ref_mv);
1806
1807   // Work out the size of the first step in the mv step search.
1808   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
1809   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1810     // Take wtd average of the step_params based on the last frame's
1811     // max mv magnitude and that based on the best ref mvs of the current
1812     // block for the given reference.
1813     step_param = (vp9_init_search_range(x->max_mv_context[ref]) +
1814                     cpi->mv_step_param) / 2;
1815   } else {
1816     step_param = cpi->mv_step_param;
1817   }
1818
1819   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
1820     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
1821                                                        b_width_log2(bsize)));
1822     step_param = MAX(step_param, boffset);
1823   }
1824
1825   if (cpi->sf.adaptive_motion_search) {
1826     int bwl = b_width_log2(bsize);
1827     int bhl = b_height_log2(bsize);
1828     int i;
1829     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
1830
1831     if (tlevel < 5)
1832       step_param += 2;
1833
1834     for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
1835       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
1836         x->pred_mv[ref].row = 0;
1837         x->pred_mv[ref].col = 0;
1838         tmp_mv->as_int = INVALID_MV;
1839
1840         if (scaled_ref_frame) {
1841           int i;
1842           for (i = 0; i < MAX_MB_PLANE; i++)
1843             xd->plane[i].pre[0] = backup_yv12[i];
1844         }
1845         return;
1846       }
1847     }
1848   }
1849
1850   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
1851
1852   mvp_full.col >>= 3;
1853   mvp_full.row >>= 3;
1854
1855   bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
1856                                   cond_sad_list(cpi, sad_list),
1857                                   &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
1858
1859   x->mv_col_min = tmp_col_min;
1860   x->mv_col_max = tmp_col_max;
1861   x->mv_row_min = tmp_row_min;
1862   x->mv_row_max = tmp_row_max;
1863
1864   if (bestsme < INT_MAX) {
1865     int dis;  /* TODO: use dis in distortion calculation later. */
1866     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
1867                                  cm->allow_high_precision_mv,
1868                                  x->errorperbit,
1869                                  &cpi->fn_ptr[bsize],
1870                                  cpi->sf.mv.subpel_force_stop,
1871                                  cpi->sf.mv.subpel_iters_per_step,
1872                                  cond_sad_list(cpi, sad_list),
1873                                  x->nmvjointcost, x->mvcost,
1874                                  &dis, &x->pred_sse[ref], NULL, 0, 0);
1875   }
1876   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
1877                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1878
1879   if (cpi->sf.adaptive_motion_search)
1880     x->pred_mv[ref] = tmp_mv->as_mv;
1881
1882   if (scaled_ref_frame) {
1883     int i;
1884     for (i = 0; i < MAX_MB_PLANE; i++)
1885       xd->plane[i].pre[0] = backup_yv12[i];
1886   }
1887 }
1888
1889 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1890                                 BLOCK_SIZE bsize,
1891                                 int_mv *frame_mv,
1892                                 int mi_row, int mi_col,
1893                                 int_mv single_newmv[MAX_REF_FRAMES],
1894                                 int *rate_mv) {
1895   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1896   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1897   MACROBLOCKD *xd = &x->e_mbd;
1898   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
1899   const int refs[2] = { mbmi->ref_frame[0],
1900                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1901   int_mv ref_mv[2];
1902   int ite, ref;
1903   // Prediction buffer from second frame.
1904   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1905   const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
1906
1907   // Do joint motion search in compound mode to get more accurate mv.
1908   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1909   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
1910   int last_besterr[2] = {INT_MAX, INT_MAX};
1911   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1912     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
1913     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
1914   };
1915
1916   for (ref = 0; ref < 2; ++ref) {
1917     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
1918
1919     if (scaled_ref_frame[ref]) {
1920       int i;
1921       // Swap out the reference frame for a version that's been scaled to
1922       // match the resolution of the current frame, allowing the existing
1923       // motion search code to be used without additional modifications.
1924       for (i = 0; i < MAX_MB_PLANE; i++)
1925         backup_yv12[ref][i] = xd->plane[i].pre[ref];
1926       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1927                            NULL);
1928     }
1929
1930     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1931   }
1932
1933   // Allow joint search multiple times iteratively for each ref frame
1934   // and break out the search loop if it couldn't find better mv.
1935   for (ite = 0; ite < 4; ite++) {
1936     struct buf_2d ref_yv12[2];
1937     int bestsme = INT_MAX;
1938     int sadpb = x->sadperbit16;
1939     MV tmp_mv;
1940     int search_range = 3;
1941
1942     int tmp_col_min = x->mv_col_min;
1943     int tmp_col_max = x->mv_col_max;
1944     int tmp_row_min = x->mv_row_min;
1945     int tmp_row_max = x->mv_row_max;
1946     int id = ite % 2;
1947
1948     // Initialized here because of compiler problem in Visual Studio.
1949     ref_yv12[0] = xd->plane[0].pre[0];
1950     ref_yv12[1] = xd->plane[0].pre[1];
1951
1952     // Get pred block from second frame.
1953     vp9_build_inter_predictor(ref_yv12[!id].buf,
1954                               ref_yv12[!id].stride,
1955                               second_pred, pw,
1956                               &frame_mv[refs[!id]].as_mv,
1957                               &xd->block_refs[!id]->sf,
1958                               pw, ph, 0,
1959                               kernel, MV_PRECISION_Q3,
1960                               mi_col * MI_SIZE, mi_row * MI_SIZE);
1961
1962     // Compound motion search on first ref frame.
1963     if (id)
1964       xd->plane[0].pre[0] = ref_yv12[id];
1965     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
1966
1967     // Use mv result from single mode as mvp.
1968     tmp_mv = frame_mv[refs[id]].as_mv;
1969
1970     tmp_mv.col >>= 3;
1971     tmp_mv.row >>= 3;
1972
1973     // Small-range full-pixel motion search
1974     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
1975                                        search_range,
1976                                        &cpi->fn_ptr[bsize],
1977                                        &ref_mv[id].as_mv, second_pred);
1978     if (bestsme < INT_MAX)
1979       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1980                                       second_pred, &cpi->fn_ptr[bsize], 1);
1981
1982     x->mv_col_min = tmp_col_min;
1983     x->mv_col_max = tmp_col_max;
1984     x->mv_row_min = tmp_row_min;
1985     x->mv_row_max = tmp_row_max;
1986
1987     if (bestsme < INT_MAX) {
1988       int dis; /* TODO: use dis in distortion calculation later. */
1989       unsigned int sse;
1990       bestsme = cpi->find_fractional_mv_step(
1991           x, &tmp_mv,
1992           &ref_mv[id].as_mv,
1993           cpi->common.allow_high_precision_mv,
1994           x->errorperbit,
1995           &cpi->fn_ptr[bsize],
1996           0, cpi->sf.mv.subpel_iters_per_step,
1997           NULL,
1998           x->nmvjointcost, x->mvcost,
1999           &dis, &sse, second_pred,
2000           pw, ph);
2001     }
2002
2003     if (id)
2004       xd->plane[0].pre[0] = scaled_first_yv12;
2005
2006     if (bestsme < last_besterr[id]) {
2007       frame_mv[refs[id]].as_mv = tmp_mv;
2008       last_besterr[id] = bestsme;
2009     } else {
2010       break;
2011     }
2012   }
2013
2014   *rate_mv = 0;
2015
2016   for (ref = 0; ref < 2; ++ref) {
2017     if (scaled_ref_frame[ref]) {
2018       // restore the predictor
2019       int i;
2020       for (i = 0; i < MAX_MB_PLANE; i++)
2021         xd->plane[i].pre[ref] = backup_yv12[ref][i];
2022     }
2023
2024     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2025                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2026                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2027   }
2028
2029   vpx_free(second_pred);
2030 }
2031
2032 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2033                                    uint8_t *orig_dst[MAX_MB_PLANE],
2034                                    int orig_dst_stride[MAX_MB_PLANE]) {
2035   int i;
2036   for (i = 0; i < MAX_MB_PLANE; i++) {
2037     xd->plane[i].dst.buf = orig_dst[i];
2038     xd->plane[i].dst.stride = orig_dst_stride[i];
2039   }
2040 }
2041
2042 static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
2043                                     BLOCK_SIZE bsize, int *rate2,
2044                                     int64_t *distortion, int64_t *distortion_uv,
2045                                     int *disable_skip) {
2046   VP9_COMMON *cm = &cpi->common;
2047   MACROBLOCKD *xd = &x->e_mbd;
2048   const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2049   const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2050   unsigned int var, sse;
2051   // Skipping threshold for ac.
2052   unsigned int thresh_ac;
2053   // Skipping threshold for dc
2054   unsigned int thresh_dc;
2055
2056   var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2057                                xd->plane[0].dst.buf,
2058                                xd->plane[0].dst.stride, &sse);
2059
2060   if (x->encode_breakout > 0) {
2061     // Set a maximum for threshold to avoid big PSNR loss in low bitrate
2062     // case. Use extreme low threshold for static frames to limit skipping.
2063     const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2064                                      ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2065     // The encode_breakout input
2066     const unsigned int min_thresh =
2067         MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2068
2069     // Calculate threshold according to dequant value.
2070     thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2071     thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2072
2073     // Adjust threshold according to partition size.
2074     thresh_ac >>= 8 - (b_width_log2(bsize) +
2075         b_height_log2(bsize));
2076     thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2077   } else {
2078     thresh_ac = 0;
2079     thresh_dc = 0;
2080   }
2081
2082   // Y skipping condition checking
2083   if (sse < thresh_ac || sse == 0) {
2084     // dc skipping checking
2085     if ((sse - var) < thresh_dc || sse == var) {
2086       unsigned int sse_u, sse_v;
2087       unsigned int var_u, var_v;
2088
2089       var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2090                                       x->plane[1].src.stride,
2091                                       xd->plane[1].dst.buf,
2092                                       xd->plane[1].dst.stride, &sse_u);
2093
2094       // U skipping condition checking
2095       if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2096           (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2097         var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2098                                         x->plane[2].src.stride,
2099                                         xd->plane[2].dst.buf,
2100                                         xd->plane[2].dst.stride, &sse_v);
2101
2102         // V skipping condition checking
2103         if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2104             (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2105           x->skip = 1;
2106
2107           // The cost of skip bit needs to be added.
2108           *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2109
2110           // Scaling factor for SSE from spatial domain to frequency domain
2111           // is 16. Adjust distortion accordingly.
2112           *distortion_uv = (sse_u + sse_v) << 4;
2113           *distortion = (sse << 4) + *distortion_uv;
2114
2115           *disable_skip = 1;
2116         }
2117       }
2118     }
2119   }
2120 }
2121
2122 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2123                                  BLOCK_SIZE bsize,
2124                                  int64_t txfm_cache[],
2125                                  int *rate2, int64_t *distortion,
2126                                  int *skippable,
2127                                  int *rate_y, int64_t *distortion_y,
2128                                  int *rate_uv, int64_t *distortion_uv,
2129                                  int *disable_skip,
2130                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
2131                                  int mi_row, int mi_col,
2132                                  int_mv single_newmv[MAX_REF_FRAMES],
2133                                  INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2134                                  int (*single_skippable)[MAX_REF_FRAMES],
2135                                  int64_t *psse,
2136                                  const int64_t ref_best_rd) {
2137   VP9_COMMON *cm = &cpi->common;
2138   RD_OPT *rd_opt = &cpi->rd;
2139   MACROBLOCKD *xd = &x->e_mbd;
2140   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
2141   const int is_comp_pred = has_second_ref(mbmi);
2142   const int this_mode = mbmi->mode;
2143   int_mv *frame_mv = mode_mv[this_mode];
2144   int i;
2145   int refs[2] = { mbmi->ref_frame[0],
2146     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2147   int_mv cur_mv[2];
2148   int64_t this_rd = 0;
2149   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2150   int pred_exists = 0;
2151   int intpel_mv;
2152   int64_t rd, tmp_rd, best_rd = INT64_MAX;
2153   int best_needs_copy = 0;
2154   uint8_t *orig_dst[MAX_MB_PLANE];
2155   int orig_dst_stride[MAX_MB_PLANE];
2156   int rs = 0;
2157   INTERP_FILTER best_filter = SWITCHABLE;
2158   uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
2159   int64_t bsse[MAX_MB_PLANE << 2] = {0};
2160
2161   int bsl = mi_width_log2_lookup[bsize];
2162   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
2163       (((mi_row + mi_col) >> bsl) +
2164        get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
2165
2166   if (pred_filter_search) {
2167     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2168     if (xd->up_available)
2169       af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
2170     if (xd->left_available)
2171       lf = xd->mi[-1].src_mi->mbmi.interp_filter;
2172
2173     if ((this_mode != NEWMV) || (af == lf))
2174       best_filter = af;
2175   }
2176
2177   if (is_comp_pred) {
2178     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2179         frame_mv[refs[1]].as_int == INVALID_MV)
2180       return INT64_MAX;
2181
2182     if (cpi->sf.adaptive_mode_search) {
2183       if (single_filter[this_mode][refs[0]] ==
2184           single_filter[this_mode][refs[1]])
2185         best_filter = single_filter[this_mode][refs[0]];
2186     }
2187   }
2188
2189   if (this_mode == NEWMV) {
2190     int rate_mv;
2191     if (is_comp_pred) {
2192       // Initialize mv using single prediction mode result.
2193       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2194       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2195
2196       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2197         joint_motion_search(cpi, x, bsize, frame_mv,
2198                             mi_row, mi_col, single_newmv, &rate_mv);
2199       } else {
2200         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2201                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
2202                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2203         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2204                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
2205                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2206       }
2207       *rate2 += rate_mv;
2208     } else {
2209       int_mv tmp_mv;
2210       single_motion_search(cpi, x, bsize, mi_row, mi_col,
2211                            &tmp_mv, &rate_mv);
2212       if (tmp_mv.as_int == INVALID_MV)
2213         return INT64_MAX;
2214       *rate2 += rate_mv;
2215       frame_mv[refs[0]].as_int =
2216           xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2217       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2218     }
2219   }
2220
2221   for (i = 0; i < is_comp_pred + 1; ++i) {
2222     cur_mv[i] = frame_mv[refs[i]];
2223     // Clip "next_nearest" so that it does not extend to far out of image
2224     if (this_mode != NEWMV)
2225       clamp_mv2(&cur_mv[i].as_mv, xd);
2226
2227     if (mv_check_bounds(x, &cur_mv[i].as_mv))
2228       return INT64_MAX;
2229     mbmi->mv[i].as_int = cur_mv[i].as_int;
2230   }
2231
2232   // do first prediction into the destination buffer. Do the next
2233   // prediction into a temporary buffer. Then keep track of which one
2234   // of these currently holds the best predictor, and use the other
2235   // one for future predictions. In the end, copy from tmp_buf to
2236   // dst if necessary.
2237   for (i = 0; i < MAX_MB_PLANE; i++) {
2238     orig_dst[i] = xd->plane[i].dst.buf;
2239     orig_dst_stride[i] = xd->plane[i].dst.stride;
2240   }
2241
2242   /* We don't include the cost of the second reference here, because there
2243    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2244    * words if you present them in that order, the second one is always known
2245    * if the first is known */
2246   *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2247
2248   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2249       mbmi->mode != NEARESTMV)
2250     return INT64_MAX;
2251
2252   pred_exists = 0;
2253   // Are all MVs integer pel for Y and UV
2254   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2255   if (is_comp_pred)
2256     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2257
2258   // Search for best switchable filter by checking the variance of
2259   // pred error irrespective of whether the filter will be used
2260   rd_opt->mask_filter = 0;
2261   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2262     rd_opt->filter_cache[i] = INT64_MAX;
2263
2264   if (cm->interp_filter != BILINEAR) {
2265     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2266       best_filter = EIGHTTAP;
2267     } else if (best_filter == SWITCHABLE) {
2268       int newbest;
2269       int tmp_rate_sum = 0;
2270       int64_t tmp_dist_sum = 0;
2271
2272       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2273         int j;
2274         int64_t rs_rd;
2275         mbmi->interp_filter = i;
2276         rs = vp9_get_switchable_rate(cpi);
2277         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2278
2279         if (i > 0 && intpel_mv) {
2280           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2281           rd_opt->filter_cache[i] = rd;
2282           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2283               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2284           if (cm->interp_filter == SWITCHABLE)
2285             rd += rs_rd;
2286           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2287         } else {
2288           int rate_sum = 0;
2289           int64_t dist_sum = 0;
2290           if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2291               (cpi->sf.interp_filter_search_mask & (1 << i))) {
2292             rate_sum = INT_MAX;
2293             dist_sum = INT64_MAX;
2294             continue;
2295           }
2296
2297           if ((cm->interp_filter == SWITCHABLE &&
2298                (!i || best_needs_copy)) ||
2299               (cm->interp_filter != SWITCHABLE &&
2300                (cm->interp_filter == mbmi->interp_filter ||
2301                 (i == 0 && intpel_mv)))) {
2302             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2303           } else {
2304             for (j = 0; j < MAX_MB_PLANE; j++) {
2305               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2306               xd->plane[j].dst.stride = 64;
2307             }
2308           }
2309           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2310           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2311
2312           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2313           rd_opt->filter_cache[i] = rd;
2314           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2315               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2316           if (cm->interp_filter == SWITCHABLE)
2317             rd += rs_rd;
2318           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2319
2320           if (i == 0 && intpel_mv) {
2321             tmp_rate_sum = rate_sum;
2322             tmp_dist_sum = dist_sum;
2323           }
2324         }
2325
2326         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2327           if (rd / 2 > ref_best_rd) {
2328             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2329             return INT64_MAX;
2330           }
2331         }
2332         newbest = i == 0 || rd < best_rd;
2333
2334         if (newbest) {
2335           best_rd = rd;
2336           best_filter = mbmi->interp_filter;
2337           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2338             best_needs_copy = !best_needs_copy;
2339           vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2340           vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2341         }
2342
2343         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2344             (cm->interp_filter != SWITCHABLE &&
2345              cm->interp_filter == mbmi->interp_filter)) {
2346           pred_exists = 1;
2347           tmp_rd = best_rd;
2348         }
2349       }
2350       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2351     }
2352   }
2353   // Set the appropriate filter
2354   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2355       cm->interp_filter : best_filter;
2356   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2357
2358   if (pred_exists) {
2359     if (best_needs_copy) {
2360       // again temporarily set the buffers to local memory to prevent a memcpy
2361       for (i = 0; i < MAX_MB_PLANE; i++) {
2362         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2363         xd->plane[i].dst.stride = 64;
2364       }
2365     }
2366     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2367   } else {
2368     int tmp_rate;
2369     int64_t tmp_dist;
2370     // Handles the special case when a filter that is not in the
2371     // switchable list (ex. bilinear) is indicated at the frame level, or
2372     // skip condition holds.
2373     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2374     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2375     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2376     vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2377     vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2378   }
2379
2380   if (!is_comp_pred)
2381     single_filter[this_mode][refs[0]] = mbmi->interp_filter;
2382
2383   if (cpi->sf.adaptive_mode_search)
2384     if (is_comp_pred)
2385       if (single_skippable[this_mode][refs[0]] &&
2386           single_skippable[this_mode][refs[1]])
2387         vpx_memset(skip_txfm, 1, sizeof(skip_txfm));
2388
2389   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2390     // if current pred_error modeled rd is substantially more than the best
2391     // so far, do not bother doing full rd
2392     if (rd / 2 > ref_best_rd) {
2393       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2394       return INT64_MAX;
2395     }
2396   }
2397
2398   if (cm->interp_filter == SWITCHABLE)
2399     *rate2 += rs;
2400
2401   if (!is_comp_pred) {
2402     if (cpi->allow_encode_breakout)
2403       rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
2404                               disable_skip);
2405   }
2406
2407   vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2408   vpx_memcpy(x->bsse, bsse, sizeof(bsse));
2409
2410   if (!x->skip) {
2411     int skippable_y, skippable_uv;
2412     int64_t sseuv = INT64_MAX;
2413     int64_t rdcosty = INT64_MAX;
2414
2415     // Y cost and distortion
2416     vp9_subtract_plane(x, bsize, 0);
2417     super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2418                     bsize, txfm_cache, ref_best_rd);
2419
2420     if (*rate_y == INT_MAX) {
2421       *rate2 = INT_MAX;
2422       *distortion = INT64_MAX;
2423       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2424       return INT64_MAX;
2425     }
2426
2427     *rate2 += *rate_y;
2428     *distortion += *distortion_y;
2429
2430     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2431     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2432
2433     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2434                      bsize, ref_best_rd - rdcosty);
2435     if (*rate_uv == INT_MAX) {
2436       *rate2 = INT_MAX;
2437       *distortion = INT64_MAX;
2438       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2439       return INT64_MAX;
2440     }
2441
2442     *psse += sseuv;
2443     *rate2 += *rate_uv;
2444     *distortion += *distortion_uv;
2445     *skippable = skippable_y && skippable_uv;
2446   }
2447
2448   if (!is_comp_pred)
2449     single_skippable[this_mode][refs[0]] = *skippable;
2450
2451   restore_dst_buf(xd, orig_dst, orig_dst_stride);
2452   return this_rd;  // if 0, this will be re-calculated by caller
2453 }
2454
2455 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2456                                int *returnrate, int64_t *returndist,
2457                                BLOCK_SIZE bsize,
2458                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2459   VP9_COMMON *const cm = &cpi->common;
2460   MACROBLOCKD *const xd = &x->e_mbd;
2461   struct macroblockd_plane *const pd = xd->plane;
2462   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2463   int y_skip = 0, uv_skip = 0;
2464   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2465   TX_SIZE max_uv_tx_size;
2466   x->skip_encode = 0;
2467   ctx->skip = 0;
2468   xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME;
2469
2470   if (bsize >= BLOCK_8X8) {
2471     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2472                                &dist_y, &y_skip, bsize, tx_cache,
2473                                best_rd) >= best_rd) {
2474       *returnrate = INT_MAX;
2475       return;
2476     }
2477   } else {
2478     y_skip = 0;
2479     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2480                                      &dist_y, best_rd) >= best_rd) {
2481       *returnrate = INT_MAX;
2482       return;
2483     }
2484   }
2485   max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2486                                        pd[1].subsampling_x,
2487                                        pd[1].subsampling_y);
2488   rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2489                           &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
2490                           max_uv_tx_size);
2491
2492   if (y_skip && uv_skip) {
2493     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2494                   vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2495     *returndist = dist_y + dist_uv;
2496     vp9_zero(ctx->tx_rd_diff);
2497   } else {
2498     int i;
2499     *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2500     *returndist = dist_y + dist_uv;
2501     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
2502       for (i = 0; i < TX_MODES; i++) {
2503         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
2504           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
2505         else
2506           ctx->tx_rd_diff[i] = 0;
2507       }
2508   }
2509
2510   ctx->mic = *xd->mi[0].src_mi;
2511 }
2512
2513 // Updating rd_thresh_freq_fact[] here means that the different
2514 // partition/block sizes are handled independently based on the best
2515 // choice for the current partition. It may well be better to keep a scaled
2516 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
2517 // combination that wins out.
2518 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
2519                                   int best_mode_index) {
2520   if (cpi->sf.adaptive_rd_thresh > 0) {
2521     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
2522     int mode;
2523     for (mode = 0; mode < top_mode; ++mode) {
2524       int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
2525
2526       if (mode == best_mode_index) {
2527         *fact -= (*fact >> 3);
2528       } else {
2529         *fact = MIN(*fact + RD_THRESH_INC,
2530                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
2531       }
2532     }
2533   }
2534 }
2535
2536 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2537                                   const TileInfo *const tile,
2538                                   int mi_row, int mi_col,
2539                                   int *returnrate,
2540                                   int64_t *returndistortion,
2541                                   BLOCK_SIZE bsize,
2542                                   PICK_MODE_CONTEXT *ctx,
2543                                   int64_t best_rd_so_far) {
2544   VP9_COMMON *const cm = &cpi->common;
2545   RD_OPT *const rd_opt = &cpi->rd;
2546   MACROBLOCKD *const xd = &x->e_mbd;
2547   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
2548   const struct segmentation *const seg = &cm->seg;
2549   struct macroblockd_plane *const pd = xd->plane;
2550   PREDICTION_MODE this_mode;
2551   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
2552   unsigned char segment_id = mbmi->segment_id;
2553   int comp_pred, i, k;
2554   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2555   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2556   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
2557   INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
2558   int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
2559   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2560                                     VP9_ALT_FLAG };
2561   int64_t best_rd = best_rd_so_far;
2562   int64_t best_tx_rd[TX_MODES];
2563   int64_t best_tx_diff[TX_MODES];
2564   int64_t best_pred_diff[REFERENCE_MODES];
2565   int64_t best_pred_rd[REFERENCE_MODES];
2566   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
2567   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
2568   MB_MODE_INFO best_mbmode;
2569   int best_mode_skippable = 0;
2570   int midx, best_mode_index = -1;
2571   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2572   vp9_prob comp_mode_p;
2573   int64_t best_intra_rd = INT64_MAX;
2574   unsigned int best_pred_sse = UINT_MAX;
2575   PREDICTION_MODE best_intra_mode = DC_PRED;
2576   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
2577   int64_t dist_uv[TX_SIZES];
2578   int skip_uv[TX_SIZES];
2579   PREDICTION_MODE mode_uv[TX_SIZES];
2580   const int intra_cost_penalty =
2581       20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
2582   int best_skip2 = 0;
2583   uint8_t ref_frame_skip_mask[2] = { 0 };
2584   uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
2585   int mode_skip_start = cpi->sf.mode_skip_start + 1;
2586   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
2587   const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
2588   int mode_threshold[MAX_MODES];
2589   int *mode_map = rd_opt->mode_map[bsize];
2590   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
2591   vp9_zero(best_mbmode);
2592
2593   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
2594
2595   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
2596                            &comp_mode_p);
2597
2598   for (i = 0; i < REFERENCE_MODES; ++i)
2599     best_pred_rd[i] = INT64_MAX;
2600   for (i = 0; i < TX_MODES; i++)
2601     best_tx_rd[i] = INT64_MAX;
2602   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2603     best_filter_rd[i] = INT64_MAX;
2604   for (i = 0; i < TX_SIZES; i++)
2605     rate_uv_intra[i] = INT_MAX;
2606   for (i = 0; i < MAX_REF_FRAMES; ++i)
2607     x->pred_sse[i] = INT_MAX;
2608   for (i = 0; i < MB_MODE_COUNT; ++i) {
2609     for (k = 0; k < MAX_REF_FRAMES; ++k) {
2610       single_inter_filter[i][k] = SWITCHABLE;
2611       single_skippable[i][k] = 0;
2612     }
2613   }
2614
2615   *returnrate = INT_MAX;
2616
2617   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2618     x->pred_mv_sad[ref_frame] = INT_MAX;
2619     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2620       setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col,
2621                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
2622     }
2623     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2624     frame_mv[ZEROMV][ref_frame].as_int = 0;
2625   }
2626
2627   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2628     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
2629       // Skip checking missing references in both single and compound reference
2630       // modes. Note that a mode will be skipped iff both reference frames
2631       // are masked out.
2632       ref_frame_skip_mask[0] |= (1 << ref_frame);
2633       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2634     } else if (cpi->sf.reference_masking) {
2635       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
2636         // Skip fixed mv modes for poor references
2637         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
2638           mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
2639           break;
2640         }
2641       }
2642     }
2643     // If the segment reference frame feature is enabled....
2644     // then do nothing if the current ref frame is not allowed..
2645     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
2646         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
2647       ref_frame_skip_mask[0] |= (1 << ref_frame);
2648       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2649     }
2650   }
2651
2652   // Disable this drop out case if the ref frame
2653   // segment level feature is enabled for this segment. This is to
2654   // prevent the possibility that we end up unable to pick any mode.
2655   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
2656     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2657     // unless ARNR filtering is enabled in which case we want
2658     // an unfiltered alternative. We allow near/nearest as well
2659     // because they may result in zero-zero MVs but be cheaper.
2660     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2661       ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
2662       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2663       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
2664       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
2665         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
2666       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
2667         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
2668     }
2669   }
2670
2671   if (cpi->rc.is_src_frame_alt_ref) {
2672     if (cpi->sf.alt_ref_search_fp) {
2673       mode_skip_mask[ALTREF_FRAME] = 0;
2674       ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
2675       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2676     }
2677   }
2678
2679   if (bsize > cpi->sf.max_intra_bsize) {
2680     ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
2681     ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
2682   }
2683
2684   mode_skip_mask[INTRA_FRAME] |=
2685       ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
2686
2687   for (i = 0; i < MAX_MODES; ++i)
2688     mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
2689
2690   midx =  cpi->sf.schedule_mode_search ? mode_skip_start : 0;
2691   while (midx > 4) {
2692     uint8_t end_pos = 0;
2693     for (i = 5; i < midx; ++i) {
2694       if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
2695         uint8_t tmp = mode_map[i];
2696         mode_map[i] = mode_map[i - 1];
2697         mode_map[i - 1] = tmp;
2698         end_pos = i;
2699       }
2700     }
2701     midx = end_pos;
2702   }
2703
2704   for (midx = 0; midx < MAX_MODES; ++midx) {
2705     int mode_index = mode_map[midx];
2706     int mode_excluded = 0;
2707     int64_t this_rd = INT64_MAX;
2708     int disable_skip = 0;
2709     int compmode_cost = 0;
2710     int rate2 = 0, rate_y = 0, rate_uv = 0;
2711     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2712     int skippable = 0;
2713     int64_t tx_cache[TX_MODES];
2714     int this_skip2 = 0;
2715     int64_t total_sse = INT64_MAX;
2716     int early_term = 0;
2717
2718     this_mode = vp9_mode_order[mode_index].mode;
2719     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
2720     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
2721
2722     // Look at the reference frame of the best mode so far and set the
2723     // skip mask to look at a subset of the remaining modes.
2724     if (midx == mode_skip_start && best_mode_index >= 0) {
2725       switch (best_mbmode.ref_frame[0]) {
2726         case INTRA_FRAME:
2727           break;
2728         case LAST_FRAME:
2729           ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
2730           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2731           break;
2732         case GOLDEN_FRAME:
2733           ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
2734           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2735           break;
2736         case ALTREF_FRAME:
2737           ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
2738           break;
2739         case NONE:
2740         case MAX_REF_FRAMES:
2741           assert(0 && "Invalid Reference frame");
2742           break;
2743       }
2744     }
2745
2746     if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
2747         ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
2748       continue;
2749
2750     if (mode_skip_mask[ref_frame] & (1 << this_mode))
2751       continue;
2752
2753     // Test best rd so far against threshold for trying this mode.
2754     if (best_mode_skippable && cpi->sf.schedule_mode_search)
2755       mode_threshold[mode_index] <<= 1;
2756
2757     if (best_rd < mode_threshold[mode_index])
2758       continue;
2759
2760     if (cpi->sf.motion_field_mode_search) {
2761       const int mi_width  = MIN(num_8x8_blocks_wide_lookup[bsize],
2762                                 tile->mi_col_end - mi_col);
2763       const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
2764                                 tile->mi_row_end - mi_row);
2765       const int bsl = mi_width_log2(bsize);
2766       int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
2767           + get_chessboard_index(cm->current_video_frame)) & 0x1;
2768       MB_MODE_INFO *ref_mbmi;
2769       int const_motion = 1;
2770       int skip_ref_frame = !cb_partition_search_ctrl;
2771       MV_REFERENCE_FRAME rf = NONE;
2772       int_mv ref_mv;
2773       ref_mv.as_int = INVALID_MV;
2774
2775       if ((mi_row - 1) >= tile->mi_row_start) {
2776         ref_mv = xd->mi[-xd->mi_stride].src_mi->mbmi.mv[0];
2777         rf = xd->mi[-xd->mi_stride].src_mi->mbmi.ref_frame[0];
2778         for (i = 0; i < mi_width; ++i) {
2779           ref_mbmi = &xd->mi[-xd->mi_stride + i].src_mi->mbmi;
2780           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2781                           (ref_frame == ref_mbmi->ref_frame[0]);
2782           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2783         }
2784       }
2785
2786       if ((mi_col - 1) >= tile->mi_col_start) {
2787         if (ref_mv.as_int == INVALID_MV)
2788           ref_mv = xd->mi[-1].src_mi->mbmi.mv[0];
2789         if (rf == NONE)
2790           rf = xd->mi[-1].src_mi->mbmi.ref_frame[0];
2791         for (i = 0; i < mi_height; ++i) {
2792           ref_mbmi = &xd->mi[i * xd->mi_stride - 1].src_mi->mbmi;
2793           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2794                           (ref_frame == ref_mbmi->ref_frame[0]);
2795           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2796         }
2797       }
2798
2799       if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
2800         if (rf > INTRA_FRAME)
2801           if (ref_frame != rf)
2802             continue;
2803
2804       if (const_motion)
2805         if (this_mode == NEARMV || this_mode == ZEROMV)
2806           continue;
2807     }
2808
2809     comp_pred = second_ref_frame > INTRA_FRAME;
2810     if (comp_pred) {
2811       if (!cm->allow_comp_inter_inter)
2812         continue;
2813
2814       // Skip compound inter modes if ARF is not available.
2815       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
2816         continue;
2817
2818       // Do not allow compound prediction if the segment level reference frame
2819       // feature is in use as in this case there can only be one reference.
2820       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
2821         continue;
2822
2823       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
2824           best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
2825         continue;
2826
2827       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
2828     } else {
2829       if (ref_frame != INTRA_FRAME)
2830         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
2831     }
2832
2833     if (ref_frame == INTRA_FRAME) {
2834       if (cpi->sf.adaptive_mode_search)
2835         if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
2836           continue;
2837
2838       if (this_mode != DC_PRED) {
2839         // Disable intra modes other than DC_PRED for blocks with low variance
2840         // Threshold for intra skipping based on source variance
2841         // TODO(debargha): Specialize the threshold for super block sizes
2842         const unsigned int skip_intra_var_thresh = 64;
2843         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
2844             x->source_variance < skip_intra_var_thresh)
2845           continue;
2846         // Only search the oblique modes if the best so far is
2847         // one of the neighboring directional modes
2848         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
2849             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
2850           if (best_mode_index >= 0 &&
2851               best_mbmode.ref_frame[0] > INTRA_FRAME)
2852             continue;
2853         }
2854         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2855           if (conditional_skipintra(this_mode, best_intra_mode))
2856               continue;
2857         }
2858       }
2859     } else {
2860       const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
2861       if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
2862                               this_mode, ref_frames))
2863         continue;
2864     }
2865
2866     mbmi->mode = this_mode;
2867     mbmi->uv_mode = DC_PRED;
2868     mbmi->ref_frame[0] = ref_frame;
2869     mbmi->ref_frame[1] = second_ref_frame;
2870     // Evaluate all sub-pel filters irrespective of whether we can use
2871     // them for this frame.
2872     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
2873                                                           : cm->interp_filter;
2874     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
2875
2876     x->skip = 0;
2877     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
2878
2879     // Select prediction reference frames.
2880     for (i = 0; i < MAX_MB_PLANE; i++) {
2881       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2882       if (comp_pred)
2883         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
2884     }
2885
2886     for (i = 0; i < TX_MODES; ++i)
2887       tx_cache[i] = INT64_MAX;
2888
2889     if (ref_frame == INTRA_FRAME) {
2890       TX_SIZE uv_tx;
2891       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2892                       NULL, bsize, tx_cache, best_rd);
2893
2894       if (rate_y == INT_MAX)
2895         continue;
2896
2897       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
2898                                   pd[1].subsampling_y);
2899       if (rate_uv_intra[uv_tx] == INT_MAX) {
2900         choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
2901                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
2902                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
2903       }
2904
2905       rate_uv = rate_uv_tokenonly[uv_tx];
2906       distortion_uv = dist_uv[uv_tx];
2907       skippable = skippable && skip_uv[uv_tx];
2908       mbmi->uv_mode = mode_uv[uv_tx];
2909
2910       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
2911       if (this_mode != DC_PRED && this_mode != TM_PRED)
2912         rate2 += intra_cost_penalty;
2913       distortion2 = distortion_y + distortion_uv;
2914     } else {
2915       this_rd = handle_inter_mode(cpi, x, bsize,
2916                                   tx_cache,
2917                                   &rate2, &distortion2, &skippable,
2918                                   &rate_y, &distortion_y,
2919                                   &rate_uv, &distortion_uv,
2920                                   &disable_skip, frame_mv,
2921                                   mi_row, mi_col,
2922                                   single_newmv, single_inter_filter,
2923                                   single_skippable, &total_sse, best_rd);
2924       if (this_rd == INT64_MAX)
2925         continue;
2926
2927       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
2928
2929       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2930         rate2 += compmode_cost;
2931     }
2932
2933     // Estimate the reference frame signaling cost and add it
2934     // to the rolling cost variable.
2935     if (comp_pred) {
2936       rate2 += ref_costs_comp[ref_frame];
2937     } else {
2938       rate2 += ref_costs_single[ref_frame];
2939     }
2940
2941     if (!disable_skip) {
2942       if (skippable) {
2943         // Back out the coefficient coding costs
2944         rate2 -= (rate_y + rate_uv);
2945
2946         // Cost the skip mb case
2947         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2948       } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
2949         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
2950             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
2951           // Add in the cost of the no skip flag.
2952           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2953         } else {
2954           // FIXME(rbultje) make this work for splitmv also
2955           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2956           distortion2 = total_sse;
2957           assert(total_sse >= 0);
2958           rate2 -= (rate_y + rate_uv);
2959           this_skip2 = 1;
2960         }
2961       } else {
2962         // Add in the cost of the no skip flag.
2963         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2964       }
2965
2966       // Calculate the final RD estimate for this mode.
2967       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2968     }
2969
2970     if (ref_frame == INTRA_FRAME) {
2971     // Keep record of best intra rd
2972       if (this_rd < best_intra_rd) {
2973         best_intra_rd = this_rd;
2974         best_intra_mode = mbmi->mode;
2975       }
2976     }
2977
2978     if (!disable_skip && ref_frame == INTRA_FRAME) {
2979       for (i = 0; i < REFERENCE_MODES; ++i)
2980         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
2981       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2982         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
2983     }
2984
2985     // Did this mode help.. i.e. is it the new best mode
2986     if (this_rd < best_rd || x->skip) {
2987       int max_plane = MAX_MB_PLANE;
2988       if (!mode_excluded) {
2989         // Note index of best mode so far
2990         best_mode_index = mode_index;
2991
2992         if (ref_frame == INTRA_FRAME) {
2993           /* required for left and above block mv */
2994           mbmi->mv[0].as_int = 0;
2995           max_plane = 1;
2996         } else {
2997           best_pred_sse = x->pred_sse[ref_frame];
2998         }
2999
3000         *returnrate = rate2;
3001         *returndistortion = distortion2;
3002         best_rd = this_rd;
3003         best_mbmode = *mbmi;
3004         best_skip2 = this_skip2;
3005         best_mode_skippable = skippable;
3006
3007         if (!x->select_tx_size)
3008           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3009         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3010                    sizeof(uint8_t) * ctx->num_4x4_blk);
3011
3012         // TODO(debargha): enhance this test with a better distortion prediction
3013         // based on qp, activity mask and history
3014         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3015             (mode_index > MIN_EARLY_TERM_INDEX)) {
3016           const int qstep = xd->plane[0].dequant[1];
3017           // TODO(debargha): Enhance this by specializing for each mode_index
3018           int scale = 4;
3019           if (x->source_variance < UINT_MAX) {
3020             const int var_adjust = (x->source_variance < 16);
3021             scale -= var_adjust;
3022           }
3023           if (ref_frame > INTRA_FRAME &&
3024               distortion2 * scale < qstep * qstep) {
3025             early_term = 1;
3026           }
3027         }
3028       }
3029     }
3030
3031     /* keep record of best compound/single-only prediction */
3032     if (!disable_skip && ref_frame != INTRA_FRAME) {
3033       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3034
3035       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3036         single_rate = rate2 - compmode_cost;
3037         hybrid_rate = rate2;
3038       } else {
3039         single_rate = rate2;
3040         hybrid_rate = rate2 + compmode_cost;
3041       }
3042
3043       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3044       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3045
3046       if (!comp_pred) {
3047         if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3048           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3049       } else {
3050         if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3051           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3052       }
3053       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3054         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3055
3056       /* keep record of best filter type */
3057       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3058         int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3059                               SWITCHABLE_FILTERS : cm->interp_filter];
3060
3061         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3062           int64_t adj_rd;
3063           if (ref == INT64_MAX)
3064             adj_rd = 0;
3065           else if (rd_opt->filter_cache[i] == INT64_MAX)
3066             // when early termination is triggered, the encoder does not have
3067             // access to the rate-distortion cost. it only knows that the cost
3068             // should be above the maximum valid value. hence it takes the known
3069             // maximum plus an arbitrary constant as the rate-distortion cost.
3070             adj_rd = rd_opt->mask_filter - ref + 10;
3071           else
3072             adj_rd = rd_opt->filter_cache[i] - ref;
3073
3074           adj_rd += this_rd;
3075           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3076         }
3077       }
3078     }
3079
3080     /* keep record of best txfm size */
3081     if (bsize < BLOCK_32X32) {
3082       if (bsize < BLOCK_16X16)
3083         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3084
3085       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3086     }
3087     if (!mode_excluded && this_rd != INT64_MAX) {
3088       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3089         int64_t adj_rd = INT64_MAX;
3090         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3091
3092         if (adj_rd < best_tx_rd[i])
3093           best_tx_rd[i] = adj_rd;
3094       }
3095     }
3096
3097     if (early_term)
3098       break;
3099
3100     if (x->skip && !comp_pred)
3101       break;
3102   }
3103
3104   // The inter modes' rate costs are not calculated precisely in some cases.
3105   // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3106   // ZEROMV. Here, checks are added for those cases, and the mode decisions
3107   // are corrected.
3108   if (best_mbmode.mode == NEWMV) {
3109     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
3110         best_mbmode.ref_frame[1]};
3111     int comp_pred_mode = refs[1] > INTRA_FRAME;
3112
3113     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3114         ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
3115             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3116       best_mbmode.mode = NEARESTMV;
3117     else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3118         ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
3119             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3120       best_mbmode.mode = NEARMV;
3121     else if (best_mbmode.mv[0].as_int == 0 &&
3122         ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
3123       best_mbmode.mode = ZEROMV;
3124   }
3125
3126   if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3127     return INT64_MAX;
3128
3129   // If we used an estimate for the uv intra rd in the loop above...
3130   if (cpi->sf.use_uv_intra_rd_estimate) {
3131     // Do Intra UV best rd mode selection if best mode choice above was intra.
3132     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3133       TX_SIZE uv_tx_size;
3134       *mbmi = best_mbmode;
3135       uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3136       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3137                               &rate_uv_tokenonly[uv_tx_size],
3138                               &dist_uv[uv_tx_size],
3139                               &skip_uv[uv_tx_size],
3140                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3141                               uv_tx_size);
3142     }
3143   }
3144
3145   assert((cm->interp_filter == SWITCHABLE) ||
3146          (cm->interp_filter == best_mbmode.interp_filter) ||
3147          !is_inter_block(&best_mbmode));
3148
3149   if (!cpi->rc.is_src_frame_alt_ref)
3150     update_rd_thresh_fact(cpi, bsize, best_mode_index);
3151
3152   // macroblock modes
3153   *mbmi = best_mbmode;
3154   x->skip |= best_skip2;
3155
3156   for (i = 0; i < REFERENCE_MODES; ++i) {
3157     if (best_pred_rd[i] == INT64_MAX)
3158       best_pred_diff[i] = INT_MIN;
3159     else
3160       best_pred_diff[i] = best_rd - best_pred_rd[i];
3161   }
3162
3163   if (!x->skip) {
3164     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3165       if (best_filter_rd[i] == INT64_MAX)
3166         best_filter_diff[i] = 0;
3167       else
3168         best_filter_diff[i] = best_rd - best_filter_rd[i];
3169     }
3170     if (cm->interp_filter == SWITCHABLE)
3171       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3172     for (i = 0; i < TX_MODES; i++) {
3173       if (best_tx_rd[i] == INT64_MAX)
3174         best_tx_diff[i] = 0;
3175       else
3176         best_tx_diff[i] = best_rd - best_tx_rd[i];
3177     }
3178   } else {
3179     vp9_zero(best_filter_diff);
3180     vp9_zero(best_tx_diff);
3181   }
3182
3183   // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3184   // updating code causes PSNR loss. Need to figure out the confliction.
3185   x->skip |= best_mode_skippable;
3186
3187   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3188                        best_tx_diff, best_filter_diff, best_mode_skippable);
3189
3190   return best_rd;
3191 }
3192
3193 int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
3194                                            int *returnrate,
3195                                            int64_t *returndistortion,
3196                                            BLOCK_SIZE bsize,
3197                                            PICK_MODE_CONTEXT *ctx,
3198                                            int64_t best_rd_so_far) {
3199   VP9_COMMON *const cm = &cpi->common;
3200   RD_OPT *const rd_opt = &cpi->rd;
3201   MACROBLOCKD *const xd = &x->e_mbd;
3202   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3203   unsigned char segment_id = mbmi->segment_id;
3204   const int comp_pred = 0;
3205   int i;
3206   int64_t best_tx_diff[TX_MODES];
3207   int64_t best_pred_diff[REFERENCE_MODES];
3208   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3209   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3210   vp9_prob comp_mode_p;
3211   INTERP_FILTER best_filter = SWITCHABLE;
3212   int64_t this_rd = INT64_MAX;
3213   int rate2 = 0;
3214   const int64_t distortion2 = 0;
3215
3216   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3217
3218   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3219                            &comp_mode_p);
3220
3221   for (i = 0; i < MAX_REF_FRAMES; ++i)
3222     x->pred_sse[i] = INT_MAX;
3223   for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
3224     x->pred_mv_sad[i] = INT_MAX;
3225
3226   *returnrate = INT_MAX;
3227
3228   assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3229
3230   mbmi->mode = ZEROMV;
3231   mbmi->uv_mode = DC_PRED;
3232   mbmi->ref_frame[0] = LAST_FRAME;
3233   mbmi->ref_frame[1] = NONE;
3234   mbmi->mv[0].as_int = 0;
3235   x->skip = 1;
3236
3237   // Search for best switchable filter by checking the variance of
3238   // pred error irrespective of whether the filter will be used
3239   rd_opt->mask_filter = 0;
3240   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3241     rd_opt->filter_cache[i] = INT64_MAX;
3242
3243   if (cm->interp_filter != BILINEAR) {
3244     best_filter = EIGHTTAP;
3245     if (cm->interp_filter == SWITCHABLE &&
3246         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3247       int rs;
3248       int best_rs = INT_MAX;
3249       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3250         mbmi->interp_filter = i;
3251         rs = vp9_get_switchable_rate(cpi);
3252         if (rs < best_rs) {
3253           best_rs = rs;
3254           best_filter = mbmi->interp_filter;
3255         }
3256       }
3257     }
3258   }
3259   // Set the appropriate filter
3260   if (cm->interp_filter == SWITCHABLE) {
3261     mbmi->interp_filter = best_filter;
3262     rate2 += vp9_get_switchable_rate(cpi);
3263   } else {
3264     mbmi->interp_filter = cm->interp_filter;
3265   }
3266
3267   if (cm->reference_mode == REFERENCE_MODE_SELECT)
3268     rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3269
3270   // Estimate the reference frame signaling cost and add it
3271   // to the rolling cost variable.
3272   rate2 += ref_costs_single[LAST_FRAME];
3273   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3274
3275   *returnrate = rate2;
3276   *returndistortion = distortion2;
3277
3278   if (this_rd >= best_rd_so_far)
3279     return INT64_MAX;
3280
3281   assert((cm->interp_filter == SWITCHABLE) ||
3282          (cm->interp_filter == mbmi->interp_filter));
3283
3284   update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
3285
3286   vp9_zero(best_pred_diff);
3287   vp9_zero(best_filter_diff);
3288   vp9_zero(best_tx_diff);
3289
3290   if (!x->select_tx_size)
3291     swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3292   store_coding_context(x, ctx, THR_ZEROMV,
3293                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3294
3295   return this_rd;
3296 }
3297
3298 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3299                                       const TileInfo *const tile,
3300                                       int mi_row, int mi_col,
3301                                       int *returnrate,
3302                                       int64_t *returndistortion,
3303                                       BLOCK_SIZE bsize,
3304                                       PICK_MODE_CONTEXT *ctx,
3305                                       int64_t best_rd_so_far) {
3306   VP9_COMMON *const cm = &cpi->common;
3307   RD_OPT *const rd_opt = &cpi->rd;
3308   MACROBLOCKD *const xd = &x->e_mbd;
3309   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3310   const struct segmentation *const seg = &cm->seg;
3311   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3312   unsigned char segment_id = mbmi->segment_id;
3313   int comp_pred, i;
3314   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3315   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3316   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3317                                     VP9_ALT_FLAG };
3318   int64_t best_rd = best_rd_so_far;
3319   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3320   static const int64_t best_tx_diff[TX_MODES] = { 0 };
3321   int64_t best_pred_diff[REFERENCE_MODES];
3322   int64_t best_pred_rd[REFERENCE_MODES];
3323   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3324   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3325   MB_MODE_INFO best_mbmode;
3326   int ref_index, best_ref_index = 0;
3327   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3328   vp9_prob comp_mode_p;
3329   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3330   int rate_uv_intra, rate_uv_tokenonly;
3331   int64_t dist_uv;
3332   int skip_uv;
3333   PREDICTION_MODE mode_uv = DC_PRED;
3334   const int intra_cost_penalty =
3335       20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3336   int_mv seg_mvs[4][MAX_REF_FRAMES];
3337   b_mode_info best_bmodes[4];
3338   int best_skip2 = 0;
3339   int ref_frame_skip_mask[2] = { 0 };
3340
3341   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3342   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3343   vp9_zero(best_mbmode);
3344
3345   for (i = 0; i < 4; i++) {
3346     int j;
3347     for (j = 0; j < MAX_REF_FRAMES; j++)
3348       seg_mvs[i][j].as_int = INVALID_MV;
3349   }
3350
3351   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3352                            &comp_mode_p);
3353
3354   for (i = 0; i < REFERENCE_MODES; ++i)
3355     best_pred_rd[i] = INT64_MAX;
3356   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3357     best_filter_rd[i] = INT64_MAX;
3358   rate_uv_intra = INT_MAX;
3359
3360   *returnrate = INT_MAX;
3361
3362   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3363     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3364       setup_buffer_inter(cpi, x, tile,
3365                              ref_frame, bsize, mi_row, mi_col,
3366                              frame_mv[NEARESTMV], frame_mv[NEARMV],
3367                              yv12_mb);
3368     } else {
3369       ref_frame_skip_mask[0] |= (1 << ref_frame);
3370       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3371     }
3372     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3373     frame_mv[ZEROMV][ref_frame].as_int = 0;
3374   }
3375
3376   for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3377     int mode_excluded = 0;
3378     int64_t this_rd = INT64_MAX;
3379     int disable_skip = 0;
3380     int compmode_cost = 0;
3381     int rate2 = 0, rate_y = 0, rate_uv = 0;
3382     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3383     int skippable = 0;
3384     int i;
3385     int this_skip2 = 0;
3386     int64_t total_sse = INT_MAX;
3387     int early_term = 0;
3388
3389     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3390     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3391
3392     // Look at the reference frame of the best mode so far and set the
3393     // skip mask to look at a subset of the remaining modes.
3394     if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3395       if (ref_index == 3) {
3396         switch (best_mbmode.ref_frame[0]) {
3397           case INTRA_FRAME:
3398             break;
3399           case LAST_FRAME:
3400             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3401             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3402             break;
3403           case GOLDEN_FRAME:
3404             ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3405             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3406             break;
3407           case ALTREF_FRAME:
3408             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3409             break;
3410           case NONE:
3411           case MAX_REF_FRAMES:
3412             assert(0 && "Invalid Reference frame");
3413             break;
3414         }
3415       }
3416     }
3417
3418     if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
3419         ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
3420       continue;
3421
3422     // Test best rd so far against threshold for trying this mode.
3423     if (rd_less_than_thresh(best_rd,
3424                             rd_opt->threshes[segment_id][bsize][ref_index],
3425                             rd_opt->thresh_freq_fact[bsize][ref_index]))
3426       continue;
3427
3428     comp_pred = second_ref_frame > INTRA_FRAME;
3429     if (comp_pred) {
3430       if (!cm->allow_comp_inter_inter)
3431         continue;
3432       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3433         continue;
3434       // Do not allow compound prediction if the segment level reference frame
3435       // feature is in use as in this case there can only be one reference.
3436       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3437         continue;
3438
3439       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3440           best_mbmode.ref_frame[0] == INTRA_FRAME)
3441         continue;
3442     }
3443
3444     // TODO(jingning, jkoleszar): scaling reference frame not supported for
3445     // sub8x8 blocks.
3446     if (ref_frame > INTRA_FRAME &&
3447         vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3448       continue;
3449
3450     if (second_ref_frame > INTRA_FRAME &&
3451         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3452       continue;
3453
3454     if (comp_pred)
3455       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3456     else if (ref_frame != INTRA_FRAME)
3457       mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3458
3459     // If the segment reference frame feature is enabled....
3460     // then do nothing if the current ref frame is not allowed..
3461     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3462         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3463       continue;
3464     // Disable this drop out case if the ref frame
3465     // segment level feature is enabled for this segment. This is to
3466     // prevent the possibility that we end up unable to pick any mode.
3467     } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3468       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3469       // unless ARNR filtering is enabled in which case we want
3470       // an unfiltered alternative. We allow near/nearest as well
3471       // because they may result in zero-zero MVs but be cheaper.
3472       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3473         continue;
3474     }
3475
3476     mbmi->tx_size = TX_4X4;
3477     mbmi->uv_mode = DC_PRED;
3478     mbmi->ref_frame[0] = ref_frame;
3479     mbmi->ref_frame[1] = second_ref_frame;
3480     // Evaluate all sub-pel filters irrespective of whether we can use
3481     // them for this frame.
3482     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3483                                                           : cm->interp_filter;
3484     x->skip = 0;
3485     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3486
3487     // Select prediction reference frames.
3488     for (i = 0; i < MAX_MB_PLANE; i++) {
3489       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3490       if (comp_pred)
3491         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3492     }
3493
3494     if (ref_frame == INTRA_FRAME) {
3495       int rate;
3496       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3497                                        &distortion_y, best_rd) >= best_rd)
3498         continue;
3499       rate2 += rate;
3500       rate2 += intra_cost_penalty;
3501       distortion2 += distortion_y;
3502
3503       if (rate_uv_intra == INT_MAX) {
3504         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3505                              &rate_uv_intra,
3506                              &rate_uv_tokenonly,
3507                              &dist_uv, &skip_uv,
3508                              &mode_uv);
3509       }
3510       rate2 += rate_uv_intra;
3511       rate_uv = rate_uv_tokenonly;
3512       distortion2 += dist_uv;
3513       distortion_uv = dist_uv;
3514       mbmi->uv_mode = mode_uv;
3515     } else {
3516       int rate;
3517       int64_t distortion;
3518       int64_t this_rd_thresh;
3519       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3520       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3521       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3522       int tmp_best_skippable = 0;
3523       int switchable_filter_index;
3524       int_mv *second_ref = comp_pred ?
3525                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3526       b_mode_info tmp_best_bmodes[16];
3527       MB_MODE_INFO tmp_best_mbmode;
3528       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3529       int pred_exists = 0;
3530       int uv_skippable;
3531
3532       this_rd_thresh = (ref_frame == LAST_FRAME) ?
3533           rd_opt->threshes[segment_id][bsize][THR_LAST] :
3534           rd_opt->threshes[segment_id][bsize][THR_ALTR];
3535       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3536       rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
3537       rd_opt->mask_filter = 0;
3538       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3539         rd_opt->filter_cache[i] = INT64_MAX;
3540
3541       if (cm->interp_filter != BILINEAR) {
3542         tmp_best_filter = EIGHTTAP;
3543         if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3544           tmp_best_filter = EIGHTTAP;
3545         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
3546                    ctx->pred_interp_filter < SWITCHABLE) {
3547           tmp_best_filter = ctx->pred_interp_filter;
3548         } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
3549           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
3550                               ctx->pred_interp_filter : 0;
3551         } else {
3552           for (switchable_filter_index = 0;
3553                switchable_filter_index < SWITCHABLE_FILTERS;
3554                ++switchable_filter_index) {
3555             int newbest, rs;
3556             int64_t rs_rd;
3557             mbmi->interp_filter = switchable_filter_index;
3558             tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3559                                               &mbmi->ref_mvs[ref_frame][0],
3560                                               second_ref, best_yrd, &rate,
3561                                               &rate_y, &distortion,
3562                                               &skippable, &total_sse,
3563                                               (int) this_rd_thresh, seg_mvs,
3564                                               bsi, switchable_filter_index,
3565                                               mi_row, mi_col);
3566
3567             if (tmp_rd == INT64_MAX)
3568               continue;
3569             rs = vp9_get_switchable_rate(cpi);
3570             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3571             rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
3572             rd_opt->filter_cache[SWITCHABLE_FILTERS] =
3573                 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
3574                     tmp_rd + rs_rd);
3575             if (cm->interp_filter == SWITCHABLE)
3576               tmp_rd += rs_rd;
3577
3578             rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
3579
3580             newbest = (tmp_rd < tmp_best_rd);
3581             if (newbest) {
3582               tmp_best_filter = mbmi->interp_filter;
3583               tmp_best_rd = tmp_rd;
3584             }
3585             if ((newbest && cm->interp_filter == SWITCHABLE) ||
3586                 (mbmi->interp_filter == cm->interp_filter &&
3587                  cm->interp_filter != SWITCHABLE)) {
3588               tmp_best_rdu = tmp_rd;
3589               tmp_best_rate = rate;
3590               tmp_best_ratey = rate_y;
3591               tmp_best_distortion = distortion;
3592               tmp_best_sse = total_sse;
3593               tmp_best_skippable = skippable;
3594               tmp_best_mbmode = *mbmi;
3595               for (i = 0; i < 4; i++) {
3596                 tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3597                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
3598               }
3599               pred_exists = 1;
3600               if (switchable_filter_index == 0 &&
3601                   cpi->sf.use_rd_breakout &&
3602                   best_rd < INT64_MAX) {
3603                 if (tmp_best_rdu / 2 > best_rd) {
3604                   // skip searching the other filters if the first is
3605                   // already substantially larger than the best so far
3606                   tmp_best_filter = mbmi->interp_filter;
3607                   tmp_best_rdu = INT64_MAX;
3608                   break;
3609                 }
3610               }
3611             }
3612           }  // switchable_filter_index loop
3613         }
3614       }
3615
3616       if (tmp_best_rdu == INT64_MAX && pred_exists)
3617         continue;
3618
3619       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
3620                              tmp_best_filter : cm->interp_filter);
3621       if (!pred_exists) {
3622         // Handles the special case when a filter that is not in the
3623         // switchable list (bilinear, 6-tap) is indicated at the frame level
3624         tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3625                                           &mbmi->ref_mvs[ref_frame][0],
3626                                           second_ref, best_yrd, &rate, &rate_y,
3627                                           &distortion, &skippable, &total_sse,
3628                                           (int) this_rd_thresh, seg_mvs, bsi, 0,
3629                                           mi_row, mi_col);
3630         if (tmp_rd == INT64_MAX)
3631           continue;
3632       } else {
3633         total_sse = tmp_best_sse;
3634         rate = tmp_best_rate;
3635         rate_y = tmp_best_ratey;
3636         distortion = tmp_best_distortion;
3637         skippable = tmp_best_skippable;
3638         *mbmi = tmp_best_mbmode;
3639         for (i = 0; i < 4; i++)
3640           xd->mi[0].src_mi->bmi[i] = tmp_best_bmodes[i];
3641       }
3642
3643       rate2 += rate;
3644       distortion2 += distortion;
3645
3646       if (cm->interp_filter == SWITCHABLE)
3647         rate2 += vp9_get_switchable_rate(cpi);
3648
3649       if (!mode_excluded)
3650         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
3651                                   : cm->reference_mode == COMPOUND_REFERENCE;
3652
3653       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3654
3655       tmp_best_rdu = best_rd -
3656           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3657               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3658
3659       if (tmp_best_rdu > 0) {
3660         // If even the 'Y' rd value of split is higher than best so far
3661         // then dont bother looking at UV
3662         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3663                                         BLOCK_8X8);
3664         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
3665                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
3666         if (rate_uv == INT_MAX)
3667           continue;
3668         rate2 += rate_uv;
3669         distortion2 += distortion_uv;
3670         skippable = skippable && uv_skippable;
3671         total_sse += uv_sse;
3672       }
3673     }
3674
3675     if (cm->reference_mode == REFERENCE_MODE_SELECT)
3676       rate2 += compmode_cost;
3677
3678     // Estimate the reference frame signaling cost and add it
3679     // to the rolling cost variable.
3680     if (second_ref_frame > INTRA_FRAME) {
3681       rate2 += ref_costs_comp[ref_frame];
3682     } else {
3683       rate2 += ref_costs_single[ref_frame];
3684     }
3685
3686     if (!disable_skip) {
3687       // Skip is never coded at the segment level for sub8x8 blocks and instead
3688       // always coded in the bitstream at the mode info level.
3689
3690       if (ref_frame != INTRA_FRAME && !xd->lossless) {
3691         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3692             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3693           // Add in the cost of the no skip flag.
3694           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3695         } else {
3696           // FIXME(rbultje) make this work for splitmv also
3697           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3698           distortion2 = total_sse;
3699           assert(total_sse >= 0);
3700           rate2 -= (rate_y + rate_uv);
3701           rate_y = 0;
3702           rate_uv = 0;
3703           this_skip2 = 1;
3704         }
3705       } else {
3706         // Add in the cost of the no skip flag.
3707         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3708       }
3709
3710       // Calculate the final RD estimate for this mode.
3711       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3712     }
3713
3714     if (!disable_skip && ref_frame == INTRA_FRAME) {
3715       for (i = 0; i < REFERENCE_MODES; ++i)
3716         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3717       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3718         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3719     }
3720
3721     // Did this mode help.. i.e. is it the new best mode
3722     if (this_rd < best_rd || x->skip) {
3723       if (!mode_excluded) {
3724         int max_plane = MAX_MB_PLANE;
3725         // Note index of best mode so far
3726         best_ref_index = ref_index;
3727
3728         if (ref_frame == INTRA_FRAME) {
3729           /* required for left and above block mv */
3730           mbmi->mv[0].as_int = 0;
3731           max_plane = 1;
3732         }
3733
3734         *returnrate = rate2;
3735         *returndistortion = distortion2;
3736         best_rd = this_rd;
3737         best_yrd = best_rd -
3738                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3739         best_mbmode = *mbmi;
3740         best_skip2 = this_skip2;
3741         if (!x->select_tx_size)
3742           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3743         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
3744                    sizeof(uint8_t) * ctx->num_4x4_blk);
3745
3746         for (i = 0; i < 4; i++)
3747           best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3748
3749         // TODO(debargha): enhance this test with a better distortion prediction
3750         // based on qp, activity mask and history
3751         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3752             (ref_index > MIN_EARLY_TERM_INDEX)) {
3753           const int qstep = xd->plane[0].dequant[1];
3754           // TODO(debargha): Enhance this by specializing for each mode_index
3755           int scale = 4;
3756           if (x->source_variance < UINT_MAX) {
3757             const int var_adjust = (x->source_variance < 16);
3758             scale -= var_adjust;
3759           }
3760           if (ref_frame > INTRA_FRAME &&
3761               distortion2 * scale < qstep * qstep) {
3762             early_term = 1;
3763           }
3764         }
3765       }
3766     }
3767
3768     /* keep record of best compound/single-only prediction */
3769     if (!disable_skip && ref_frame != INTRA_FRAME) {
3770       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3771
3772       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3773         single_rate = rate2 - compmode_cost;
3774         hybrid_rate = rate2;
3775       } else {
3776         single_rate = rate2;
3777         hybrid_rate = rate2 + compmode_cost;
3778       }
3779
3780       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3781       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3782
3783       if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
3784         best_pred_rd[SINGLE_REFERENCE] = single_rd;
3785       else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
3786         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3787
3788       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3789         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3790     }
3791
3792     /* keep record of best filter type */
3793     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3794         cm->interp_filter != BILINEAR) {
3795       int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3796                               SWITCHABLE_FILTERS : cm->interp_filter];
3797       int64_t adj_rd;
3798       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3799         if (ref == INT64_MAX)
3800           adj_rd = 0;
3801         else if (rd_opt->filter_cache[i] == INT64_MAX)
3802           // when early termination is triggered, the encoder does not have
3803           // access to the rate-distortion cost. it only knows that the cost
3804           // should be above the maximum valid value. hence it takes the known
3805           // maximum plus an arbitrary constant as the rate-distortion cost.
3806           adj_rd = rd_opt->mask_filter - ref + 10;
3807         else
3808           adj_rd = rd_opt->filter_cache[i] - ref;
3809
3810         adj_rd += this_rd;
3811         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3812       }
3813     }
3814
3815     if (early_term)
3816       break;
3817
3818     if (x->skip && !comp_pred)
3819       break;
3820   }
3821
3822   if (best_rd >= best_rd_so_far)
3823     return INT64_MAX;
3824
3825   // If we used an estimate for the uv intra rd in the loop above...
3826   if (cpi->sf.use_uv_intra_rd_estimate) {
3827     // Do Intra UV best rd mode selection if best mode choice above was intra.
3828     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3829       *mbmi = best_mbmode;
3830       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
3831                               &rate_uv_tokenonly,
3832                               &dist_uv,
3833                               &skip_uv,
3834                               BLOCK_8X8, TX_4X4);
3835     }
3836   }
3837
3838   if (best_rd == INT64_MAX) {
3839     *returnrate = INT_MAX;
3840     *returndistortion = INT64_MAX;
3841     return best_rd;
3842   }
3843
3844   assert((cm->interp_filter == SWITCHABLE) ||
3845          (cm->interp_filter == best_mbmode.interp_filter) ||
3846          !is_inter_block(&best_mbmode));
3847
3848   update_rd_thresh_fact(cpi, bsize, best_ref_index);
3849
3850   // macroblock modes
3851   *mbmi = best_mbmode;
3852   x->skip |= best_skip2;
3853   if (!is_inter_block(&best_mbmode)) {
3854     for (i = 0; i < 4; i++)
3855       xd->mi[0].src_mi->bmi[i].as_mode = best_bmodes[i].as_mode;
3856   } else {
3857     for (i = 0; i < 4; ++i)
3858       vpx_memcpy(&xd->mi[0].src_mi->bmi[i], &best_bmodes[i],
3859                  sizeof(b_mode_info));
3860
3861     mbmi->mv[0].as_int = xd->mi[0].src_mi->bmi[3].as_mv[0].as_int;
3862     mbmi->mv[1].as_int = xd->mi[0].src_mi->bmi[3].as_mv[1].as_int;
3863   }
3864
3865   for (i = 0; i < REFERENCE_MODES; ++i) {
3866     if (best_pred_rd[i] == INT64_MAX)
3867       best_pred_diff[i] = INT_MIN;
3868     else
3869       best_pred_diff[i] = best_rd - best_pred_rd[i];
3870   }
3871
3872   if (!x->skip) {
3873     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3874       if (best_filter_rd[i] == INT64_MAX)
3875         best_filter_diff[i] = 0;
3876       else
3877         best_filter_diff[i] = best_rd - best_filter_rd[i];
3878     }
3879     if (cm->interp_filter == SWITCHABLE)
3880       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3881   } else {
3882     vp9_zero(best_filter_diff);
3883   }
3884
3885   store_coding_context(x, ctx, best_ref_index,
3886                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3887
3888   return best_rd;
3889 }
3890