granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include <stdio.h>
  13 #include <math.h>
  14 #include <limits.h>
  15 #include <assert.h>
  16
  17 #include "vp9/common/vp9_pragmas.h"
  18 #include "vp9/encoder/vp9_tokenize.h"
  19 #include "vp9/encoder/vp9_treewriter.h"
  20 #include "vp9/encoder/vp9_onyx_int.h"
  21 #include "vp9/encoder/vp9_modecosts.h"
  22 #include "vp9/encoder/vp9_encodeintra.h"
  23 #include "vp9/common/vp9_entropymode.h"
  24 #include "vp9/common/vp9_reconinter.h"
  25 #include "vp9/common/vp9_reconintra.h"
  26 #include "vp9/common/vp9_findnearmv.h"
  27 #include "vp9/common/vp9_quant_common.h"
  28 #include "vp9/encoder/vp9_encodemb.h"
  29 #include "vp9/encoder/vp9_quantize.h"
  30 #include "vp9/encoder/vp9_variance.h"
  31 #include "vp9/encoder/vp9_mcomp.h"
  32 #include "vp9/encoder/vp9_rdopt.h"
  33 #include "vp9/encoder/vp9_ratectrl.h"
  34 #include "vpx_mem/vpx_mem.h"
  35 #include "vp9/common/vp9_systemdependent.h"
  36 #include "vp9/encoder/vp9_encodemv.h"
  37 #include "vp9/common/vp9_seg_common.h"
  38 #include "vp9/common/vp9_pred_common.h"
  39 #include "vp9/common/vp9_entropy.h"
  40 #include "vp9_rtcd.h"
  41 #include "vp9/common/vp9_mvref_common.h"
  42 #include "vp9/common/vp9_common.h"
  43
  44 #define INVALID_MV 0x80008000
  45
  46 /* Factor to weigh the rate for switchable interp filters */
  47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  48
  49 DECLARE_ALIGNED(16, extern const uint8_t,
  50                 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
  51
  52 #define I4X4_PRED 0x8000
  53 #define SPLITMV 0x10000
  54
  55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  56   {ZEROMV,    LAST_FRAME,   NONE},
  57   {DC_PRED,   INTRA_FRAME,  NONE},
  58
  59   {NEARESTMV, LAST_FRAME,   NONE},
  60   {NEARMV,    LAST_FRAME,   NONE},
  61
  62   {ZEROMV,    GOLDEN_FRAME, NONE},
  63   {NEARESTMV, GOLDEN_FRAME, NONE},
  64
  65   {ZEROMV,    ALTREF_FRAME, NONE},
  66   {NEARESTMV, ALTREF_FRAME, NONE},
  67
  68   {NEARMV,    GOLDEN_FRAME, NONE},
  69   {NEARMV,    ALTREF_FRAME, NONE},
  70
  71   {V_PRED,    INTRA_FRAME,  NONE},
  72   {H_PRED,    INTRA_FRAME,  NONE},
  73   {D45_PRED,  INTRA_FRAME,  NONE},
  74   {D135_PRED, INTRA_FRAME,  NONE},
  75   {D117_PRED, INTRA_FRAME,  NONE},
  76   {D153_PRED, INTRA_FRAME,  NONE},
  77   {D27_PRED,  INTRA_FRAME,  NONE},
  78   {D63_PRED,  INTRA_FRAME,  NONE},
  79
  80   {TM_PRED,   INTRA_FRAME,  NONE},
  81
  82   {NEWMV,     LAST_FRAME,   NONE},
  83   {NEWMV,     GOLDEN_FRAME, NONE},
  84   {NEWMV,     ALTREF_FRAME, NONE},
  85
  86   {SPLITMV,   LAST_FRAME,   NONE},
  87   {SPLITMV,   GOLDEN_FRAME, NONE},
  88   {SPLITMV,   ALTREF_FRAME, NONE},
  89
  90   {I4X4_PRED, INTRA_FRAME,  NONE},
  91
  92   /* compound prediction modes */
  93   {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  94   {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  95   {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
  96
  97   {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  98   {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  99   {NEARMV,    ALTREF_FRAME, LAST_FRAME},
 100
 101   {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
 102   {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
 103   {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
 104
 105   {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
 106   {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
 107   {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
 108
 109   {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
 110   {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
 111   {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
 112 };
 113
 114 static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
 115                              vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
 116                              vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
 117   int i, j, k, l;
 118   TX_SIZE t;
 119   for (t = TX_4X4; t <= TX_32X32; t++)
 120     for (i = 0; i < BLOCK_TYPES; i++)
 121       for (j = 0; j < REF_TYPES; j++)
 122         for (k = 0; k < COEF_BANDS; k++)
 123           for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
 124             vp9_prob probs[ENTROPY_NODES];
 125             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
 126             vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
 127                             vp9_coef_tree);
 128 #if CONFIG_BALANCED_COEFTREE
 129             // Replace the eob node prob with a very small value so that the
 130             // cost approximately equals the cost without the eob node
 131             probs[1] = 1;
 132             vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
 133 #else
 134             vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
 135                                  vp9_coef_tree);
 136             assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
 137                    cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
 138 #endif
 139           }
 140 }
 141
 142 static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
 143                                 0, 0, 0, 0, 0, 0, 0, 0,
 144                                 0, 0, 0, 0, 0, 0, 0, 0,
 145                                 0, 0, 0, 0, 0, 0, 0, 0, };
 146
 147 // 3* dc_qlookup[Q]*dc_qlookup[Q];
 148
 149 /* values are now correlated to quantizer */
 150 static int sad_per_bit16lut[QINDEX_RANGE];
 151 static int sad_per_bit4lut[QINDEX_RANGE];
 152
 153 void vp9_init_me_luts() {
 154   int i;
 155
 156   // Initialize the sad lut tables using a formulaic calculation for now
 157   // This is to make it easier to resolve the impact of experimental changes
 158   // to the quantizer tables.
 159   for (i = 0; i < QINDEX_RANGE; i++) {
 160     sad_per_bit16lut[i] =
 161       (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
 162     sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
 163   }
 164 }
 165
 166 static int compute_rd_mult(int qindex) {
 167   const int q = vp9_dc_quant(qindex, 0);
 168   return (11 * q * q) >> 2;
 169 }
 170
 171 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 172   cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
 173   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
 174 }
 175
 176
 177 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
 178   int q, i;
 179
 180   vp9_clear_system_state();  // __asm emms;
 181
 182   // Further tests required to see if optimum is different
 183   // for key frames, golden frames and arf frames.
 184   // if (cpi->common.refresh_golden_frame ||
 185   //     cpi->common.refresh_alt_ref_frame)
 186   qindex = clamp(qindex, 0, MAXQ);
 187
 188   cpi->RDMULT = compute_rd_mult(qindex);
 189   if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 190     if (cpi->twopass.next_iiratio > 31)
 191       cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
 192     else
 193       cpi->RDMULT +=
 194           (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
 195   }
 196   cpi->mb.errorperbit = cpi->RDMULT >> 6;
 197   cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
 198
 199   vp9_set_speed_features(cpi);
 200
 201   q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
 202   q <<= 2;
 203   if (q < 8)
 204     q = 8;
 205
 206   if (cpi->RDMULT > 1000) {
 207     cpi->RDDIV = 1;
 208     cpi->RDMULT /= 100;
 209
 210     for (i = 0; i < MAX_MODES; i++) {
 211       if (cpi->sf.thresh_mult[i] < INT_MAX) {
 212         cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
 213       } else {
 214         cpi->rd_threshes[i] = INT_MAX;
 215       }
 216       cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
 217     }
 218   } else {
 219     cpi->RDDIV = 100;
 220
 221     for (i = 0; i < MAX_MODES; i++) {
 222       if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
 223         cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
 224       } else {
 225         cpi->rd_threshes[i] = INT_MAX;
 226       }
 227       cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
 228     }
 229   }
 230
 231   fill_token_costs(cpi->mb.token_costs,
 232                    cpi->mb.token_costs_noskip,
 233                    cpi->common.fc.coef_probs);
 234
 235   for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
 236     vp9_cost_tokens(cpi->mb.partition_cost[i],
 237                     cpi->common.fc.partition_prob[i],
 238                     vp9_partition_tree);
 239
 240   /*rough estimate for costing*/
 241   vp9_init_mode_costs(cpi);
 242
 243   if (cpi->common.frame_type != KEY_FRAME) {
 244     vp9_build_nmv_cost_table(
 245         cpi->mb.nmvjointcost,
 246         cpi->mb.e_mbd.allow_high_precision_mv ?
 247         cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
 248         &cpi->common.fc.nmvc,
 249         cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
 250   }
 251 }
 252
 253 int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
 254   int i, error = 0;
 255
 256   for (i = 0; i < block_size; i++) {
 257     int this_diff = coeff[i] - dqcoeff[i];
 258     error += this_diff * this_diff;
 259   }
 260
 261   return error;
 262 }
 263
 264 static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
 265                               int plane, int block, PLANE_TYPE type,
 266                               ENTROPY_CONTEXT *A,
 267                               ENTROPY_CONTEXT *L,
 268                               TX_SIZE tx_size,
 269                               int y_blocks) {
 270   MACROBLOCKD *const xd = &mb->e_mbd;
 271   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
 272   int pt;
 273   int c = 0;
 274   int cost = 0, pad;
 275   const int *scan, *nb;
 276   const int eob = xd->plane[plane].eobs[block];
 277   const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
 278                                            block, 16);
 279   const int ref = mbmi->ref_frame != INTRA_FRAME;
 280   unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
 281       mb->token_costs[tx_size][type][ref];
 282   ENTROPY_CONTEXT above_ec, left_ec;
 283   TX_TYPE tx_type = DCT_DCT;
 284
 285   const int segment_id = xd->mode_info_context->mbmi.segment_id;
 286   unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
 287       mb->token_costs_noskip[tx_size][type][ref];
 288
 289   int seg_eob, default_eob;
 290   uint8_t token_cache[1024];
 291   const uint8_t * band_translate;
 292
 293   // Check for consistency of tx_size with mode info
 294   assert((!type && !plane) || (type && plane));
 295   if (type == PLANE_TYPE_Y_WITH_DC) {
 296     assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
 297   } else {
 298     TX_SIZE tx_size_uv = get_uv_tx_size(xd);
 299     assert(tx_size == tx_size_uv);
 300   }
 301
 302   switch (tx_size) {
 303     case TX_4X4: {
 304       tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
 305           get_tx_type_4x4(xd, block) : DCT_DCT;
 306       above_ec = A[0] != 0;
 307       left_ec = L[0] != 0;
 308       seg_eob = 16;
 309       scan = get_scan_4x4(tx_type);
 310       band_translate = vp9_coefband_trans_4x4;
 311       break;
 312     }
 313     case TX_8X8: {
 314       const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
 315       const int sz = 1 + b_width_log2(sb_type);
 316       const int x = block & ((1 << sz) - 1), y = block - x;
 317       TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
 318           get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
 319       above_ec = (A[0] + A[1]) != 0;
 320       left_ec = (L[0] + L[1]) != 0;
 321       scan = get_scan_8x8(tx_type);
 322       seg_eob = 64;
 323       band_translate = vp9_coefband_trans_8x8plus;
 324       break;
 325     }
 326     case TX_16X16: {
 327       const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
 328       const int sz = 2 + b_width_log2(sb_type);
 329       const int x = block & ((1 << sz) - 1), y = block - x;
 330       TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
 331           get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
 332       scan = get_scan_16x16(tx_type);
 333       seg_eob = 256;
 334       above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
 335       left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
 336       band_translate = vp9_coefband_trans_8x8plus;
 337       break;
 338     }
 339     case TX_32X32:
 340       scan = vp9_default_scan_32x32;
 341       seg_eob = 1024;
 342       above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
 343       left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
 344       band_translate = vp9_coefband_trans_8x8plus;
 345       break;
 346     default:
 347       abort();
 348       break;
 349   }
 350   assert(eob <= seg_eob);
 351
 352   pt = combine_entropy_contexts(above_ec, left_ec);
 353   nb = vp9_get_coef_neighbors_handle(scan, &pad);
 354   default_eob = seg_eob;
 355
 356   if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
 357     seg_eob = 0;
 358
 359   /* sanity check to ensure that we do not have spurious non-zero q values */
 360   if (eob < seg_eob)
 361     assert(qcoeff_ptr[scan[eob]] == 0);
 362
 363   {
 364     for (c = 0; c < eob; c++) {
 365       int v = qcoeff_ptr[scan[c]];
 366       int t = vp9_dct_value_tokens_ptr[v].token;
 367       int band = get_coef_band(band_translate, c);
 368       if (c)
 369         pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
 370
 371       if (!c || token_cache[scan[c - 1]])  // do not skip eob
 372         cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
 373       else
 374         cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
 375       token_cache[scan[c]] = vp9_pt_energy_class[t];
 376     }
 377     if (c < seg_eob) {
 378       if (c)
 379         pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
 380       cost += mb->token_costs_noskip[tx_size][type][ref]
 381           [get_coef_band(band_translate, c)]
 382           [pt][DCT_EOB_TOKEN];
 383     }
 384   }
 385
 386   // is eob first coefficient;
 387   for (pt = 0; pt < (1 << tx_size); pt++) {
 388     A[pt] = L[pt] = c > 0;
 389   }
 390
 391   return cost;
 392 }
 393
 394 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 395                                      int (*r)[2], int *rate,
 396                                      int *d, int *distortion,
 397                                      int *s, int *skip,
 398                                      int64_t txfm_cache[NB_TXFM_MODES],
 399                                      TX_SIZE max_txfm_size) {
 400   VP9_COMMON *const cm = &cpi->common;
 401   MACROBLOCKD *const xd = &x->e_mbd;
 402   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
 403   vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
 404   int64_t rd[TX_SIZE_MAX_SB][2];
 405   int n, m;
 406   int s0, s1;
 407
 408   for (n = TX_4X4; n <= max_txfm_size; n++) {
 409     r[n][1] = r[n][0];
 410     for (m = 0; m <= n - (n == max_txfm_size); m++) {
 411       if (m == n)
 412         r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
 413       else
 414         r[n][1] += vp9_cost_one(cm->prob_tx[m]);
 415     }
 416   }
 417
 418   assert(skip_prob > 0);
 419   s0 = vp9_cost_bit(skip_prob, 0);
 420   s1 = vp9_cost_bit(skip_prob, 1);
 421
 422   for (n = TX_4X4; n <= max_txfm_size; n++) {
 423     if (s[n]) {
 424       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 425     } else {
 426       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 427       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 428     }
 429   }
 430
 431   if (max_txfm_size == TX_32X32 &&
 432       (cm->txfm_mode == ALLOW_32X32 ||
 433        (cm->txfm_mode == TX_MODE_SELECT &&
 434         rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
 435         rd[TX_32X32][1] < rd[TX_4X4][1]))) {
 436     mbmi->txfm_size = TX_32X32;
 437   } else if (max_txfm_size >= TX_16X16 &&
 438              (cm->txfm_mode == ALLOW_16X16 ||
 439               cm->txfm_mode == ALLOW_32X32 ||
 440               (cm->txfm_mode == TX_MODE_SELECT &&
 441                rd[TX_16X16][1] < rd[TX_8X8][1] &&
 442                rd[TX_16X16][1] < rd[TX_4X4][1]))) {
 443     mbmi->txfm_size = TX_16X16;
 444   } else if (cm->txfm_mode == ALLOW_8X8 ||
 445              cm->txfm_mode == ALLOW_16X16 ||
 446              cm->txfm_mode == ALLOW_32X32 ||
 447            (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
 448     mbmi->txfm_size = TX_8X8;
 449   } else {
 450     mbmi->txfm_size = TX_4X4;
 451   }
 452
 453   *distortion = d[mbmi->txfm_size];
 454   *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
 455   *skip       = s[mbmi->txfm_size];
 456
 457   txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
 458   txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
 459   txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
 460   txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
 461   if (max_txfm_size == TX_32X32 &&
 462       rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
 463       rd[TX_32X32][1] < rd[TX_4X4][1])
 464     txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 465   else if (max_txfm_size >= TX_16X16 &&
 466            rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
 467     txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 468   else
 469     txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
 470                                  rd[TX_4X4][1] : rd[TX_8X8][1];
 471 }
 472
 473 static int block_error(int16_t *coeff, int16_t *dqcoeff,
 474                        int block_size, int shift) {
 475   int i;
 476   int64_t error = 0;
 477
 478   for (i = 0; i < block_size; i++) {
 479     int this_diff = coeff[i] - dqcoeff[i];
 480     error += (unsigned)this_diff * this_diff;
 481   }
 482   error >>= shift;
 483
 484   return error > INT_MAX ? INT_MAX : (int)error;
 485 }
 486
 487 static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
 488   const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
 489   return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
 490                      16 << (bwl + bhl), shift);
 491 }
 492
 493 static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
 494   const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
 495   int64_t sum = 0;
 496   int plane;
 497
 498   for (plane = 1; plane < MAX_MB_PLANE; plane++) {
 499     const int subsampling = x->e_mbd.plane[plane].subsampling_x +
 500                             x->e_mbd.plane[plane].subsampling_y;
 501     sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
 502                        16 << (bwl + bhl - subsampling), 0);
 503   }
 504   sum >>= shift;
 505   return sum > INT_MAX ? INT_MAX : (int)sum;
 506 }
 507
 508 struct rdcost_block_args {
 509   VP9_COMMON *cm;
 510   MACROBLOCK *x;
 511   ENTROPY_CONTEXT t_above[16];
 512   ENTROPY_CONTEXT t_left[16];
 513   TX_SIZE tx_size;
 514   int bw;
 515   int bh;
 516   int cost;
 517 };
 518
 519 static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
 520                          int ss_txfrm_size, void *arg) {
 521   struct rdcost_block_args* args = arg;
 522   int x_idx, y_idx;
 523   MACROBLOCKD * const xd = &args->x->e_mbd;
 524
 525   txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
 526                            &y_idx);
 527
 528   args->cost += cost_coeffs(args->cm, args->x, plane, block,
 529                             xd->plane[plane].plane_type, args->t_above + x_idx,
 530                             args->t_left + y_idx, args->tx_size,
 531                             args->bw * args->bh);
 532 }
 533
 534 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
 535                         BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
 536   MACROBLOCKD * const xd = &x->e_mbd;
 537   const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
 538   const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
 539   const int bw = 1 << bwl, bh = 1 << bhl;
 540   struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
 541
 542   vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
 543              sizeof(ENTROPY_CONTEXT) * bw);
 544   vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
 545              sizeof(ENTROPY_CONTEXT) * bh);
 546
 547   foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
 548
 549   return args.cost;
 550 }
 551
 552 static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
 553                      BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
 554   int cost = 0, plane;
 555
 556   for (plane = 1; plane < MAX_MB_PLANE; plane++) {
 557     cost += rdcost_plane(cm, x, plane, bsize, tx_size);
 558   }
 559   return cost;
 560 }
 561
 562 static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
 563                                      int *rate, int *distortion, int *skippable,
 564                                      BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
 565   MACROBLOCKD *const xd = &x->e_mbd;
 566   xd->mode_info_context->mbmi.txfm_size = tx_size;
 567
 568   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
 569     vp9_encode_intra_block_y(cm, x, bsize);
 570   else
 571     vp9_xform_quant_sby(cm, x, bsize);
 572
 573   *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
 574   *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
 575   *skippable  = vp9_sby_is_skippable(xd, bsize);
 576 }
 577
 578 static void super_block_yrd(VP9_COMP *cpi,
 579                             MACROBLOCK *x, int *rate, int *distortion,
 580                             int *skip, BLOCK_SIZE_TYPE bs,
 581                             int64_t txfm_cache[NB_TXFM_MODES]) {
 582   VP9_COMMON *const cm = &cpi->common;
 583   int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
 584   MACROBLOCKD *xd = &x->e_mbd;
 585   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
 586
 587   if (mbmi->ref_frame > INTRA_FRAME)
 588     vp9_subtract_sby(x, bs);
 589
 590   if (cpi->speed > 4) {
 591     if (bs >= BLOCK_SIZE_SB32X32) {
 592       mbmi->txfm_size = TX_32X32;
 593     } else if (bs >= BLOCK_SIZE_MB16X16) {
 594       mbmi->txfm_size = TX_16X16;
 595     } else if (bs >= BLOCK_SIZE_SB8X8) {
 596       mbmi->txfm_size = TX_8X8;
 597     } else {
 598       mbmi->txfm_size = TX_4X4;
 599     }
 600     vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
 601     super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
 602                              mbmi->txfm_size);
 603     return;
 604   }
 605   if (bs >= BLOCK_SIZE_SB32X32)
 606     super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
 607                              bs, TX_32X32);
 608   if (bs >= BLOCK_SIZE_MB16X16)
 609     super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
 610                              bs, TX_16X16);
 611   super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
 612                            TX_8X8);
 613   super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
 614                            TX_4X4);
 615
 616   choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
 617                            TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
 618                            - (bs < BLOCK_SIZE_MB16X16));
 619 }
 620
 621 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 622                                      MB_PREDICTION_MODE *best_mode,
 623                                      int *bmode_costs,
 624                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
 625                                      int *bestrate, int *bestratey,
 626                                      int *bestdistortion,
 627                                      BLOCK_SIZE_TYPE bsize) {
 628   MB_PREDICTION_MODE mode;
 629   MACROBLOCKD *xd = &x->e_mbd;
 630   int64_t best_rd = INT64_MAX;
 631   int rate = 0;
 632   int distortion;
 633   VP9_COMMON *const cm = &cpi->common;
 634   const int src_stride = x->plane[0].src.stride;
 635   uint8_t *src, *dst;
 636   int16_t *src_diff, *coeff;
 637
 638   ENTROPY_CONTEXT ta[2], tempa[2];
 639   ENTROPY_CONTEXT tl[2], templ[2];
 640   TX_TYPE tx_type = DCT_DCT;
 641   TX_TYPE best_tx_type = DCT_DCT;
 642   int bw = 1 << b_width_log2(bsize);
 643   int bh = 1 << b_height_log2(bsize);
 644   int idx, idy, block;
 645   DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
 646
 647   assert(ib < 4);
 648
 649   vpx_memcpy(ta, a, sizeof(ta));
 650   vpx_memcpy(tl, l, sizeof(tl));
 651   xd->mode_info_context->mbmi.txfm_size = TX_4X4;
 652
 653   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 654     int64_t this_rd;
 655     int ratey = 0;
 656
 657     rate = bmode_costs[mode];
 658     distortion = 0;
 659
 660     vpx_memcpy(tempa, ta, sizeof(ta));
 661     vpx_memcpy(templ, tl, sizeof(tl));
 662
 663     for (idy = 0; idy < bh; ++idy) {
 664       for (idx = 0; idx < bw; ++idx) {
 665         block = ib + idy * 2 + idx;
 666         xd->mode_info_context->bmi[block].as_mode.first = mode;
 667         src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
 668                                         x->plane[0].src.buf, src_stride);
 669         src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
 670                                              x->plane[0].src_diff);
 671         coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
 672         dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
 673                                         xd->plane[0].dst.buf,
 674                                         xd->plane[0].dst.stride);
 675         vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
 676                              dst, xd->plane[0].dst.stride);
 677         vp9_subtract_block(4, 4, src_diff, 8,
 678                            src, src_stride,
 679                            dst, xd->plane[0].dst.stride);
 680
 681         tx_type = get_tx_type_4x4(xd, block);
 682         if (tx_type != DCT_DCT) {
 683           vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
 684           x->quantize_b_4x4(x, block, tx_type, 16);
 685         } else {
 686           x->fwd_txm4x4(src_diff, coeff, 16);
 687           x->quantize_b_4x4(x, block, tx_type, 16);
 688         }
 689
 690         ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
 691                              tempa + idx, templ + idy, TX_4X4, 16);
 692         distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
 693                                                          block, 16), 16) >> 2;
 694
 695         if (best_tx_type != DCT_DCT)
 696           vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
 697                                dst, xd->plane[0].dst.stride, best_tx_type);
 698         else
 699           xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
 700                              dst, xd->plane[0].dst.stride);
 701       }
 702     }
 703
 704     rate += ratey;
 705     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 706
 707     if (this_rd < best_rd) {
 708       *bestrate = rate;
 709       *bestratey = ratey;
 710       *bestdistortion = distortion;
 711       best_rd = this_rd;
 712       *best_mode = mode;
 713       best_tx_type = tx_type;
 714       vpx_memcpy(a, tempa, sizeof(tempa));
 715       vpx_memcpy(l, templ, sizeof(templ));
 716       for (idy = 0; idy < bh; ++idy) {
 717         for (idx = 0; idx < bw; ++idx) {
 718           block = ib + idy * 2 + idx;
 719           vpx_memcpy(best_dqcoeff[idy * 2 + idx],
 720                      BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
 721                      sizeof(best_dqcoeff[0]));
 722         }
 723       }
 724     }
 725   }
 726
 727   for (idy = 0; idy < bh; ++idy) {
 728     for (idx = 0; idx < bw; ++idx) {
 729       block = ib + idy * 2 + idx;
 730       xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
 731       dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
 732                                       xd->plane[0].dst.buf,
 733                                       xd->plane[0].dst.stride);
 734
 735       vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
 736                            dst, xd->plane[0].dst.stride);
 737       // inverse transform
 738       if (best_tx_type != DCT_DCT)
 739         vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
 740                              xd->plane[0].dst.stride, best_tx_type);
 741       else
 742         xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
 743                            xd->plane[0].dst.stride);
 744     }
 745   }
 746
 747   return best_rd;
 748 }
 749
 750 static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
 751                                          int *Rate, int *rate_y,
 752                                          int *Distortion, int64_t best_rd) {
 753   int i, j;
 754   MACROBLOCKD *const xd = &mb->e_mbd;
 755   BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
 756   int bw = 1 << b_width_log2(bsize);
 757   int bh = 1 << b_height_log2(bsize);
 758   int idx, idy;
 759   int cost = 0;
 760   int distortion = 0;
 761   int tot_rate_y = 0;
 762   int64_t total_rd = 0;
 763   ENTROPY_CONTEXT t_above[4], t_left[4];
 764   int *bmode_costs;
 765   MODE_INFO *const mic = xd->mode_info_context;
 766
 767   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
 768   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 769
 770   bmode_costs = mb->mbmode_cost;
 771
 772   for (idy = 0; idy < 2; idy += bh) {
 773     for (idx = 0; idx < 2; idx += bw) {
 774       const int mis = xd->mode_info_stride;
 775       MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
 776       int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
 777       int UNINITIALIZED_IS_SAFE(d);
 778       i = idy * 2 + idx;
 779
 780       if (xd->frame_type == KEY_FRAME) {
 781         const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
 782         const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
 783                                      left_block_mode(mic, i) : DC_PRED;
 784
 785         bmode_costs  = mb->y_mode_costs[A][L];
 786       }
 787
 788       total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
 789                                         t_above + idx, t_left + idy,
 790                                         &r, &ry, &d, bsize);
 791       cost += r;
 792       distortion += d;
 793       tot_rate_y += ry;
 794
 795       mic->bmi[i].as_mode.first = best_mode;
 796       for (j = 1; j < bh; ++j)
 797         mic->bmi[i + j * 2].as_mode.first = best_mode;
 798       for (j = 1; j < bw; ++j)
 799         mic->bmi[i + j].as_mode.first = best_mode;
 800
 801       if (total_rd >= best_rd)
 802         break;
 803     }
 804   }
 805
 806   if (total_rd >= best_rd)
 807     return INT64_MAX;
 808
 809   *Rate = cost;
 810   *rate_y = tot_rate_y;
 811   *Distortion = distortion;
 812   xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
 813
 814   return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
 815 }
 816
 817 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
 818                                       int *rate, int *rate_tokenonly,
 819                                       int *distortion, int *skippable,
 820                                       BLOCK_SIZE_TYPE bsize,
 821                                       int64_t txfm_cache[NB_TXFM_MODES]) {
 822   MB_PREDICTION_MODE mode;
 823   MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
 824   MACROBLOCKD *const xd = &x->e_mbd;
 825   int this_rate, this_rate_tokenonly;
 826   int this_distortion, s;
 827   int64_t best_rd = INT64_MAX, this_rd;
 828   TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
 829   int i;
 830   int *bmode_costs = x->mbmode_cost;
 831
 832   if (bsize < BLOCK_SIZE_SB8X8) {
 833     x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
 834     return best_rd;
 835   }
 836
 837   for (i = 0; i < NB_TXFM_MODES; i++)
 838     txfm_cache[i] = INT64_MAX;
 839
 840   /* Y Search for 32x32 intra prediction mode */
 841   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 842     int64_t local_txfm_cache[NB_TXFM_MODES];
 843     MODE_INFO *const mic = xd->mode_info_context;
 844     const int mis = xd->mode_info_stride;
 845
 846     if (cpi->common.frame_type == KEY_FRAME) {
 847       const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
 848       const MB_PREDICTION_MODE L = xd->left_available ?
 849                                    left_block_mode(mic, 0) : DC_PRED;
 850
 851       bmode_costs = x->y_mode_costs[A][L];
 852     }
 853     x->e_mbd.mode_info_context->mbmi.mode = mode;
 854
 855     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
 856                     bsize, local_txfm_cache);
 857
 858     this_rate = this_rate_tokenonly + bmode_costs[mode];
 859     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 860
 861     if (this_rd < best_rd) {
 862       mode_selected   = mode;
 863       best_rd         = this_rd;
 864       best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
 865       *rate           = this_rate;
 866       *rate_tokenonly = this_rate_tokenonly;
 867       *distortion     = this_distortion;
 868       *skippable      = s;
 869     }
 870
 871     for (i = 0; i < NB_TXFM_MODES; i++) {
 872       int64_t adj_rd = this_rd + local_txfm_cache[i] -
 873                        local_txfm_cache[cpi->common.txfm_mode];
 874       if (adj_rd < txfm_cache[i]) {
 875         txfm_cache[i] = adj_rd;
 876       }
 877     }
 878   }
 879
 880   x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
 881   x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
 882
 883   return best_rd;
 884 }
 885
 886 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
 887                                       int *rate, int *distortion,
 888                                       int *skippable, BLOCK_SIZE_TYPE bsize,
 889                                       TX_SIZE uv_tx_size) {
 890   MACROBLOCKD *const xd = &x->e_mbd;
 891   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
 892     vp9_encode_intra_block_uv(cm, x, bsize);
 893   else
 894     vp9_xform_quant_sbuv(cm, x, bsize);
 895
 896   *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
 897   *rate       = rdcost_uv(cm, x, bsize, uv_tx_size);
 898   *skippable  = vp9_sbuv_is_skippable(xd, bsize);
 899 }
 900
 901 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
 902                              int *rate, int *distortion, int *skippable,
 903                              BLOCK_SIZE_TYPE bsize) {
 904   MACROBLOCKD *const xd = &x->e_mbd;
 905   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
 906
 907   if (mbmi->ref_frame > INTRA_FRAME)
 908     vp9_subtract_sbuv(x, bsize);
 909
 910   if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
 911     super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
 912                               TX_32X32);
 913   } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
 914     super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
 915                               TX_16X16);
 916   } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) {
 917     super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
 918                               TX_8X8);
 919   } else {
 920     super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
 921                               TX_4X4);
 922   }
 923 }
 924
 925 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 926                                        int *rate, int *rate_tokenonly,
 927                                        int *distortion, int *skippable,
 928                                        BLOCK_SIZE_TYPE bsize) {
 929   MB_PREDICTION_MODE mode;
 930   MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
 931   int64_t best_rd = INT64_MAX, this_rd;
 932   int this_rate_tokenonly, this_rate;
 933   int this_distortion, s;
 934
 935   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 936     x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
 937     super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
 938                      &this_distortion, &s, bsize);
 939     this_rate = this_rate_tokenonly +
 940                 x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
 941     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 942
 943     if (this_rd < best_rd) {
 944       mode_selected   = mode;
 945       best_rd         = this_rd;
 946       *rate           = this_rate;
 947       *rate_tokenonly = this_rate_tokenonly;
 948       *distortion     = this_distortion;
 949       *skippable      = s;
 950     }
 951   }
 952
 953   x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
 954
 955   return best_rd;
 956 }
 957
 958 int vp9_cost_mv_ref(VP9_COMP *cpi,
 959                     MB_PREDICTION_MODE m,
 960                     const int mode_context) {
 961   MACROBLOCKD *xd = &cpi->mb.e_mbd;
 962   int segment_id = xd->mode_info_context->mbmi.segment_id;
 963
 964   // Dont account for mode here if segment skip is enabled.
 965   if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
 966     VP9_COMMON *pc = &cpi->common;
 967
 968     vp9_prob p[VP9_INTER_MODES - 1];
 969     assert(NEARESTMV <= m  &&  m <= NEWMV);
 970     vp9_mv_ref_probs(pc, p, mode_context);
 971     return cost_token(vp9_sb_mv_ref_tree, p,
 972                       vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
 973   } else
 974     return 0;
 975 }
 976
 977 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
 978   x->e_mbd.mode_info_context->mbmi.mode = mb;
 979   x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
 980 }
 981
 982 static int labels2mode(MACROBLOCK *x, int i,
 983                        MB_PREDICTION_MODE this_mode,
 984                        int_mv *this_mv, int_mv *this_second_mv,
 985                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
 986                        int_mv seg_mvs[MAX_REF_FRAMES],
 987                        int_mv *best_ref_mv,
 988                        int_mv *second_best_ref_mv,
 989                        int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
 990   MACROBLOCKD *const xd = &x->e_mbd;
 991   MODE_INFO *const mic = xd->mode_info_context;
 992   MB_MODE_INFO * mbmi = &mic->mbmi;
 993   int cost = 0, thismvcost = 0;
 994   int idx, idy;
 995   int bw = 1 << b_width_log2(mbmi->sb_type);
 996   int bh = 1 << b_height_log2(mbmi->sb_type);
 997
 998   /* We have to be careful retrieving previously-encoded motion vectors.
 999    Ones from this macroblock have to be pulled from the BLOCKD array
1000    as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1001   MB_PREDICTION_MODE m;
1002
1003   // the only time we should do costing for new motion vector or mode
1004   // is when we are on a new label  (jbb May 08, 2007)
1005   switch (m = this_mode) {
1006     case NEWMV:
1007       this_mv->as_int = seg_mvs[mbmi->ref_frame].as_int;
1008       thismvcost  = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1009                                     102, xd->allow_high_precision_mv);
1010       if (mbmi->second_ref_frame > 0) {
1011         this_second_mv->as_int = seg_mvs[mbmi->second_ref_frame].as_int;
1012         thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1013                                       mvjcost, mvcost, 102,
1014                                       xd->allow_high_precision_mv);
1015       }
1016       break;
1017     case NEARESTMV:
1018       this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int;
1019       if (mbmi->second_ref_frame > 0)
1020         this_second_mv->as_int =
1021             frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int;
1022       break;
1023     case NEARMV:
1024       this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int;
1025       if (mbmi->second_ref_frame > 0)
1026         this_second_mv->as_int =
1027             frame_mv[NEARMV][mbmi->second_ref_frame].as_int;
1028       break;
1029     case ZEROMV:
1030       this_mv->as_int = 0;
1031       if (mbmi->second_ref_frame > 0)
1032         this_second_mv->as_int = 0;
1033       break;
1034     default:
1035       break;
1036   }
1037
1038   cost = vp9_cost_mv_ref(cpi, this_mode,
1039                          mbmi->mb_mode_context[mbmi->ref_frame]);
1040
1041   mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1042   if (mbmi->second_ref_frame > 0)
1043     mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1044
1045   x->partition_info->bmi[i].mode = m;
1046   x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
1047   if (mbmi->second_ref_frame > 0)
1048     x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
1049   for (idy = 0; idy < bh; ++idy) {
1050     for (idx = 0; idx < bw; ++idx) {
1051       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1052                  &mic->bmi[i], sizeof(mic->bmi[i]));
1053       vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
1054                  &x->partition_info->bmi[i],
1055                  sizeof(x->partition_info->bmi[i]));
1056     }
1057   }
1058
1059   cost += thismvcost;
1060   return cost;
1061 }
1062
1063 static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
1064                                        MACROBLOCK *x,
1065                                        int i,
1066                                        int *labelyrate,
1067                                        int *distortion,
1068                                        ENTROPY_CONTEXT *ta,
1069                                        ENTROPY_CONTEXT *tl) {
1070   int k;
1071   MACROBLOCKD *xd = &x->e_mbd;
1072   BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1073   int bwl = b_width_log2(bsize), bw = 1 << bwl;
1074   int bhl = b_height_log2(bsize), bh = 1 << bhl;
1075   int idx, idy;
1076   const int src_stride = x->plane[0].src.stride;
1077   uint8_t* const src =
1078   raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1079                             x->plane[0].src.buf, src_stride);
1080   int16_t* src_diff =
1081   raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
1082                             x->plane[0].src_diff);
1083   int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
1084   uint8_t* const pre =
1085   raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1086                             xd->plane[0].pre[0].buf,
1087                             xd->plane[0].pre[0].stride);
1088   uint8_t* const dst =
1089   raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1090                             xd->plane[0].dst.buf,
1091                             xd->plane[0].dst.stride);
1092   int thisdistortion = 0;
1093   int thisrate = 0;
1094
1095   *labelyrate = 0;
1096   *distortion = 0;
1097
1098   vp9_build_inter_predictor(pre,
1099                             xd->plane[0].pre[0].stride,
1100                             dst,
1101                             xd->plane[0].dst.stride,
1102                             &xd->mode_info_context->bmi[i].as_mv[0],
1103                             &xd->scale_factor[0],
1104                             4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
1105
1106   // TODO(debargha): Make this work properly with the
1107   // implicit-compoundinter-weight experiment when implicit
1108   // weighting for splitmv modes is turned on.
1109   if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
1110     uint8_t* const second_pre =
1111     raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1112                               xd->plane[0].pre[1].buf,
1113                               xd->plane[0].pre[1].stride);
1114     vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
1115                               dst, xd->plane[0].dst.stride,
1116                               &xd->mode_info_context->bmi[i].as_mv[1],
1117                               &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
1118                               &xd->subpix);
1119   }
1120
1121   vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
1122                      src, src_stride,
1123                      dst, xd->plane[0].dst.stride);
1124
1125   k = i;
1126   for (idy = 0; idy < bh; ++idy) {
1127     for (idx = 0; idx < bw; ++idx) {
1128       k += (idy * 2 + idx);
1129       src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1130                                            x->plane[0].src_diff);
1131       coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
1132       x->fwd_txm4x4(src_diff, coeff, 16);
1133       x->quantize_b_4x4(x, k, DCT_DCT, 16);
1134       thisdistortion += vp9_block_error(coeff,
1135                                         BLOCK_OFFSET(xd->plane[0].dqcoeff,
1136                                                      k, 16), 16);
1137       thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
1138                               ta + (k & 1),
1139                               tl + (k >> 1), TX_4X4, 16);
1140     }
1141   }
1142   *distortion += thisdistortion;
1143   *labelyrate += thisrate;
1144
1145   *distortion >>= 2;
1146   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1147 }
1148
1149 typedef struct {
1150   int_mv *ref_mv, *second_ref_mv;
1151   int_mv mvp;
1152
1153   int64_t segment_rd;
1154   int r;
1155   int d;
1156   int segment_yrate;
1157   MB_PREDICTION_MODE modes[4];
1158   int_mv mvs[4], second_mvs[4];
1159   int eobs[4];
1160   int mvthresh;
1161 } BEST_SEG_INFO;
1162
1163 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1164   int r = 0;
1165   r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1166   r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1167   r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1168   r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1169   return r;
1170 }
1171
1172 static enum BlockSize get_block_size(int bw, int bh) {
1173   if (bw == 4 && bh == 4)
1174     return BLOCK_4X4;
1175
1176   if (bw == 4 && bh == 8)
1177     return BLOCK_4X8;
1178
1179   if (bw == 8 && bh == 4)
1180     return BLOCK_8X4;
1181
1182   if (bw == 8 && bh == 8)
1183     return BLOCK_8X8;
1184
1185   if (bw == 8 && bh == 16)
1186     return BLOCK_8X16;
1187
1188   if (bw == 16 && bh == 8)
1189     return BLOCK_16X8;
1190
1191   if (bw == 16 && bh == 16)
1192     return BLOCK_16X16;
1193
1194   if (bw == 32 && bh == 32)
1195     return BLOCK_32X32;
1196
1197   if (bw == 32 && bh == 16)
1198     return BLOCK_32X16;
1199
1200   if (bw == 16 && bh == 32)
1201     return BLOCK_16X32;
1202
1203   if (bw == 64 && bh == 32)
1204     return BLOCK_64X32;
1205
1206   if (bw == 32 && bh == 64)
1207     return BLOCK_32X64;
1208
1209   if (bw == 64 && bh == 64)
1210     return BLOCK_64X64;
1211
1212   assert(0);
1213   return -1;
1214 }
1215
1216 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1217   MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1218   x->plane[0].src.buf =
1219       raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1220                                 x->plane[0].src.buf,
1221                                 x->plane[0].src.stride);
1222   assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1223   x->e_mbd.plane[0].pre[0].buf =
1224       raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1225                                 x->e_mbd.plane[0].pre[0].buf,
1226                                 x->e_mbd.plane[0].pre[0].stride);
1227   if (mbmi->second_ref_frame)
1228     x->e_mbd.plane[0].pre[1].buf =
1229         raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1230                                   x->e_mbd.plane[0].pre[1].buf,
1231                                   x->e_mbd.plane[0].pre[1].stride);
1232 }
1233
1234 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1235                                   struct buf_2d orig_pre[2]) {
1236   MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1237   x->plane[0].src = orig_src;
1238   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1239   if (mbmi->second_ref_frame)
1240     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1241 }
1242
1243 static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1244                                     BLOCK_SIZE_TYPE bsize,
1245                                     int_mv *frame_mv,
1246                                     YV12_BUFFER_CONFIG **scaled_ref_frame,
1247                                     int mi_row, int mi_col,
1248                                     int_mv single_newmv[MAX_REF_FRAMES]);
1249
1250 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1251                                     BEST_SEG_INFO *bsi,
1252                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
1253                                     int mi_row, int mi_col) {
1254   int i, j;
1255   int br = 0, bd = 0;
1256   MB_PREDICTION_MODE this_mode;
1257   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1258   const int label_count = 4;
1259   int64_t this_segment_rd = 0, other_segment_rd;
1260   int label_mv_thresh;
1261   int rate = 0;
1262   int sbr = 0, sbd = 0;
1263   int segmentyrate = 0;
1264   int best_eobs[4] = { 0 };
1265   BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1266   int bwl = b_width_log2(bsize), bw = 1 << bwl;
1267   int bhl = b_height_log2(bsize), bh = 1 << bhl;
1268   int idx, idy;
1269   vp9_variance_fn_ptr_t *v_fn_ptr;
1270   YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
1271   ENTROPY_CONTEXT t_above[4], t_left[4];
1272   ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
1273
1274   vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1275   vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1276
1277   v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
1278
1279   // 64 makes this threshold really big effectively
1280   // making it so that we very rarely check mvs on
1281   // segments.   setting this to 1 would make mv thresh
1282   // roughly equal to what it is for macroblocks
1283   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1284
1285   // Segmentation method overheads
1286   other_segment_rd = this_segment_rd;
1287
1288   for (idy = 0; idy < 2; idy += bh) {
1289     for (idx = 0; idx < 2; idx += bw) {
1290       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1291       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1292       int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1293       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1294       int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
1295       MB_PREDICTION_MODE mode_selected = ZEROMV;
1296       int bestlabelyrate = 0;
1297       i = idy * 2 + idx;
1298
1299       frame_mv[ZEROMV][mbmi->ref_frame].as_int = 0;
1300       frame_mv[ZEROMV][mbmi->second_ref_frame].as_int = 0;
1301       vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1302                                     &frame_mv[NEARESTMV][mbmi->ref_frame],
1303                                     &frame_mv[NEARMV][mbmi->ref_frame],
1304                                     i, 0);
1305       if (mbmi->second_ref_frame > 0)
1306         vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1307                                    &frame_mv[NEARESTMV][mbmi->second_ref_frame],
1308                                    &frame_mv[NEARMV][mbmi->second_ref_frame],
1309                                    i, 1);
1310
1311       // search for the best motion vector on this segment
1312       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1313         int64_t this_rd;
1314         int distortion;
1315         int labelyrate;
1316         ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
1317         const struct buf_2d orig_src = x->plane[0].src;
1318         struct buf_2d orig_pre[2];
1319
1320         vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1321
1322         vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
1323         vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
1324
1325         // motion search for newmv (single predictor case only)
1326         if (mbmi->second_ref_frame <= 0 && this_mode == NEWMV) {
1327           int step_param = 0;
1328           int further_steps;
1329           int thissme, bestsme = INT_MAX;
1330           int sadpb = x->sadperbit4;
1331           int_mv mvp_full;
1332
1333           /* Is the best so far sufficiently good that we cant justify doing
1334            * and new motion search. */
1335           if (best_label_rd < label_mv_thresh)
1336             break;
1337
1338           if (cpi->compressor_speed) {
1339             // use previous block's result as next block's MV predictor.
1340             if (i > 0) {
1341               bsi->mvp.as_int =
1342               x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1343               if (i == 2)
1344                 bsi->mvp.as_int =
1345                 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1346               step_param = 2;
1347             }
1348           }
1349
1350           further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1351
1352           mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1353           mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1354
1355           // adjust src pointer for this block
1356           mi_buf_shift(x, i);
1357           bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1358                                            sadpb, further_steps, 0, v_fn_ptr,
1359                                            bsi->ref_mv, &mode_mv[NEWMV]);
1360
1361           // Should we do a full search (best quality only)
1362           if (cpi->compressor_speed == 0) {
1363             /* Check if mvp_full is within the range. */
1364             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1365                      x->mv_row_min, x->mv_row_max);
1366
1367             thissme = cpi->full_search_sad(x, &mvp_full,
1368                                            sadpb, 16, v_fn_ptr,
1369                                            x->nmvjointcost, x->mvcost,
1370                                            bsi->ref_mv, i);
1371
1372             if (thissme < bestsme) {
1373               bestsme = thissme;
1374               mode_mv[NEWMV].as_int =
1375                   x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1376             } else {
1377               /* The full search result is actually worse so re-instate the
1378                * previous best vector */
1379               x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1380                   mode_mv[NEWMV].as_int;
1381             }
1382           }
1383
1384           if (bestsme < INT_MAX) {
1385             int distortion;
1386             unsigned int sse;
1387             cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1388                                          bsi->ref_mv, x->errorperbit, v_fn_ptr,
1389                                          x->nmvjointcost, x->mvcost,
1390                                          &distortion, &sse);
1391
1392             // safe motion search result for use in compound prediction
1393             seg_mvs[i][mbmi->ref_frame].as_int = mode_mv[NEWMV].as_int;
1394           }
1395
1396           // restore src pointers
1397           mi_buf_restore(x, orig_src, orig_pre);
1398         } else if (mbmi->second_ref_frame > 0 && this_mode == NEWMV) {
1399           if (seg_mvs[i][mbmi->second_ref_frame].as_int == INVALID_MV ||
1400               seg_mvs[i][mbmi->ref_frame       ].as_int == INVALID_MV)
1401             continue;
1402
1403           // adjust src pointers
1404           mi_buf_shift(x, i);
1405           if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
1406             iterative_motion_search(cpi, x, bsize, frame_mv[this_mode],
1407                                     scaled_ref_frame,
1408                                     mi_row, mi_col, seg_mvs[i]);
1409             seg_mvs[i][mbmi->ref_frame].as_int =
1410                 frame_mv[this_mode][mbmi->ref_frame].as_int;
1411             seg_mvs[i][mbmi->second_ref_frame].as_int =
1412                 frame_mv[this_mode][mbmi->second_ref_frame].as_int;
1413           }
1414           // restore src pointers
1415           mi_buf_restore(x, orig_src, orig_pre);
1416         }
1417
1418         rate = labels2mode(x, i, this_mode, &mode_mv[this_mode],
1419                            &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1420                            bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1421                            x->mvcost, cpi);
1422
1423         // Trap vectors that reach beyond the UMV borders
1424         if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1425             ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1426             ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1427             ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1428           continue;
1429         }
1430         if (mbmi->second_ref_frame > 0 &&
1431             mv_check_bounds(x, &second_mode_mv[this_mode]))
1432           continue;
1433
1434         this_rd = encode_inter_mb_segment(&cpi->common,
1435                                           x, i, &labelyrate,
1436                                           &distortion, t_above_s, t_left_s);
1437         this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1438         rate += labelyrate;
1439
1440         if (this_rd < best_label_rd) {
1441           sbr = rate;
1442           sbd = distortion;
1443           bestlabelyrate = labelyrate;
1444           mode_selected = this_mode;
1445           best_label_rd = this_rd;
1446           best_eobs[i] = x->e_mbd.plane[0].eobs[i];
1447           vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
1448           vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
1449         }
1450       } /*for each 4x4 mode*/
1451
1452       vpx_memcpy(t_above, t_above_b, sizeof(t_above));
1453       vpx_memcpy(t_left, t_left_b, sizeof(t_left));
1454
1455       labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1456                   &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1457                   bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1458                   x->mvcost, cpi);
1459
1460       br += sbr;
1461       bd += sbd;
1462       segmentyrate += bestlabelyrate;
1463       this_segment_rd += best_label_rd;
1464       other_segment_rd += best_other_rd;
1465
1466       for (j = 1; j < bh; ++j)
1467         vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1468                    &x->partition_info->bmi[i],
1469                    sizeof(x->partition_info->bmi[i]));
1470       for (j = 1; j < bw; ++j)
1471         vpx_memcpy(&x->partition_info->bmi[i + j],
1472                    &x->partition_info->bmi[i],
1473                    sizeof(x->partition_info->bmi[i]));
1474     }
1475   } /* for each label */
1476
1477   if (this_segment_rd < bsi->segment_rd) {
1478     bsi->r = br;
1479     bsi->d = bd;
1480     bsi->segment_yrate = segmentyrate;
1481     bsi->segment_rd = this_segment_rd;
1482
1483     // store everything needed to come back to this!!
1484     for (i = 0; i < 4; i++) {
1485       bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1486       if (mbmi->second_ref_frame > 0)
1487         bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
1488       bsi->modes[i] = x->partition_info->bmi[i].mode;
1489       bsi->eobs[i] = best_eobs[i];
1490     }
1491   }
1492 }
1493
1494 static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
1495                                        int_mv *best_ref_mv,
1496                                        int_mv *second_best_ref_mv,
1497                                        int64_t best_rd,
1498                                        int *returntotrate,
1499                                        int *returnyrate,
1500                                        int *returndistortion,
1501                                        int *skippable, int mvthresh,
1502                                        int_mv seg_mvs[4][MAX_REF_FRAMES],
1503                                        int mi_row, int mi_col) {
1504   int i;
1505   BEST_SEG_INFO bsi;
1506   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1507
1508   vpx_memset(&bsi, 0, sizeof(bsi));
1509
1510   bsi.segment_rd = best_rd;
1511   bsi.ref_mv = best_ref_mv;
1512   bsi.second_ref_mv = second_best_ref_mv;
1513   bsi.mvp.as_int = best_ref_mv->as_int;
1514   bsi.mvthresh = mvthresh;
1515
1516   for (i = 0; i < 4; i++)
1517     bsi.modes[i] = ZEROMV;
1518
1519   rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col);
1520
1521   /* set it to the best */
1522   for (i = 0; i < 4; i++) {
1523     x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
1524     if (mbmi->second_ref_frame > 0)
1525       x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
1526       bsi.second_mvs[i].as_int;
1527     x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
1528   }
1529
1530   /* save partitions */
1531   x->partition_info->count = 4;
1532
1533   for (i = 0; i < x->partition_info->count; i++) {
1534     x->partition_info->bmi[i].mode = bsi.modes[i];
1535     x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv;
1536     if (mbmi->second_ref_frame > 0)
1537       x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv;
1538   }
1539   /*
1540    * used to set mbmi->mv.as_int
1541    */
1542   x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int;
1543   if (mbmi->second_ref_frame > 0)
1544     x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int;
1545
1546   *returntotrate = bsi.r;
1547   *returndistortion = bsi.d;
1548   *returnyrate = bsi.segment_yrate;
1549   *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
1550   mbmi->mode = bsi.modes[3];
1551
1552   return (int)(bsi.segment_rd);
1553 }
1554
1555 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
1556                     uint8_t *ref_y_buffer, int ref_y_stride,
1557                     int ref_frame, enum BlockSize block_size ) {
1558   MACROBLOCKD *xd = &x->e_mbd;
1559   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1560   int_mv this_mv;
1561   int i;
1562   int zero_seen = 0;
1563   int best_index = 0;
1564   int best_sad = INT_MAX;
1565   int this_sad = INT_MAX;
1566
1567   uint8_t *src_y_ptr = x->plane[0].src.buf;
1568   uint8_t *ref_y_ptr;
1569   int row_offset, col_offset;
1570
1571   // Get the sad for each candidate reference mv
1572   for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
1573     this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
1574
1575     // The list is at an end if we see 0 for a second time.
1576     if (!this_mv.as_int && zero_seen)
1577       break;
1578     zero_seen = zero_seen || !this_mv.as_int;
1579
1580     row_offset = this_mv.as_mv.row >> 3;
1581     col_offset = this_mv.as_mv.col >> 3;
1582     ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
1583
1584     // Find sad for current vector.
1585     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
1586                                            ref_y_ptr, ref_y_stride,
1587                                            0x7fffffff);
1588
1589     // Note if it is the best so far.
1590     if (this_sad < best_sad) {
1591       best_sad = this_sad;
1592       best_index = i;
1593     }
1594   }
1595
1596   // Note the index of the mv that worked best in the reference list.
1597   x->mv_best_ref_index[ref_frame] = best_index;
1598 }
1599
1600 extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
1601 static void estimate_curframe_refprobs(VP9_COMP *cpi,
1602                                        vp9_prob mod_refprobs[3],
1603                                        int pred_ref) {
1604   int norm_cnt[MAX_REF_FRAMES];
1605   const int *const rfct = cpi->count_mb_ref_frame_usage;
1606   int intra_count = rfct[INTRA_FRAME];
1607   int last_count  = rfct[LAST_FRAME];
1608   int gf_count    = rfct[GOLDEN_FRAME];
1609   int arf_count   = rfct[ALTREF_FRAME];
1610
1611   // Work out modified reference frame probabilities to use where prediction
1612   // of the reference frame fails
1613   if (pred_ref == INTRA_FRAME) {
1614     norm_cnt[0] = 0;
1615     norm_cnt[1] = last_count;
1616     norm_cnt[2] = gf_count;
1617     norm_cnt[3] = arf_count;
1618     vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1619     mod_refprobs[0] = 0;    // This branch implicit
1620   } else if (pred_ref == LAST_FRAME) {
1621     norm_cnt[0] = intra_count;
1622     norm_cnt[1] = 0;
1623     norm_cnt[2] = gf_count;
1624     norm_cnt[3] = arf_count;
1625     vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1626     mod_refprobs[1] = 0;    // This branch implicit
1627   } else if (pred_ref == GOLDEN_FRAME) {
1628     norm_cnt[0] = intra_count;
1629     norm_cnt[1] = last_count;
1630     norm_cnt[2] = 0;
1631     norm_cnt[3] = arf_count;
1632     vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1633     mod_refprobs[2] = 0;  // This branch implicit
1634   } else {
1635     norm_cnt[0] = intra_count;
1636     norm_cnt[1] = last_count;
1637     norm_cnt[2] = gf_count;
1638     norm_cnt[3] = 0;
1639     vp9_calc_ref_probs(norm_cnt, mod_refprobs);
1640     mod_refprobs[2] = 0;  // This branch implicit
1641   }
1642 }
1643
1644 static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
1645                                      int idx, int val, int weight) {
1646   unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
1647   unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
1648   // weight is 16-bit fixed point, so this basically calculates:
1649   // 0.5 + weight * cost1 + (1.0 - weight) * cost0
1650   return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
1651 }
1652
1653 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
1654                                      unsigned int *ref_costs) {
1655   VP9_COMMON *cm = &cpi->common;
1656   MACROBLOCKD *xd = &cpi->mb.e_mbd;
1657   vp9_prob *mod_refprobs;
1658
1659   unsigned int cost;
1660   int pred_ref;
1661   int pred_flag;
1662   int pred_ctx;
1663   int i;
1664
1665   vp9_prob pred_prob, new_pred_prob;
1666   int seg_ref_active;
1667   int seg_ref_count = 0;
1668   seg_ref_active = vp9_segfeature_active(xd,
1669                                          segment_id,
1670                                          SEG_LVL_REF_FRAME);
1671
1672   if (seg_ref_active) {
1673     seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME)  +
1674                     vp9_check_segref(xd, segment_id, LAST_FRAME)   +
1675                     vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
1676                     vp9_check_segref(xd, segment_id, ALTREF_FRAME);
1677   }
1678
1679   // Get the predicted reference for this mb
1680   pred_ref = vp9_get_pred_ref(cm, xd);
1681
1682   // Get the context probability for the prediction flag (based on last frame)
1683   pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
1684
1685   // Predict probability for current frame based on stats so far
1686   pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF);
1687   new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0],
1688                                   cpi->ref_pred_count[pred_ctx][1]);
1689
1690   // Get the set of probabilities to use if prediction fails
1691   mod_refprobs = cm->mod_refprobs[pred_ref];
1692
1693   // For each possible selected reference frame work out a cost.
1694   for (i = 0; i < MAX_REF_FRAMES; i++) {
1695     if (seg_ref_active && seg_ref_count == 1) {
1696       cost = 0;
1697     } else {
1698       pred_flag = (i == pred_ref);
1699
1700       // Get the prediction for the current mb
1701       cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
1702                            pred_flag, cpi->seg0_progress);
1703       if (cost > 1024) cost = 768;  // i.e. account for 4 bits max.
1704
1705       // for incorrectly predicted cases
1706       if (!pred_flag) {
1707         vp9_prob curframe_mod_refprobs[3];
1708
1709         if (cpi->seg0_progress) {
1710           estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref);
1711         } else {
1712           vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs));
1713         }
1714
1715         cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0,
1716                               (i != INTRA_FRAME), cpi->seg0_progress);
1717         if (i != INTRA_FRAME) {
1718           cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1,
1719                                 (i != LAST_FRAME), cpi->seg0_progress);
1720           if (i != LAST_FRAME) {
1721             cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2,
1722                                   (i != GOLDEN_FRAME), cpi->seg0_progress);
1723           }
1724         }
1725       }
1726     }
1727
1728     ref_costs[i] = cost;
1729   }
1730 }
1731
1732 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1733                                  int mode_index,
1734                                  PARTITION_INFO *partition,
1735                                  int_mv *ref_mv,
1736                                  int_mv *second_ref_mv,
1737                                  int64_t comp_pred_diff[NB_PREDICTION_TYPES],
1738                                  int64_t txfm_size_diff[NB_TXFM_MODES]) {
1739   MACROBLOCKD *const xd = &x->e_mbd;
1740
1741   // Take a snapshot of the coding context so it can be
1742   // restored if we decide to encode this way
1743   ctx->skip = x->skip;
1744   ctx->best_mode_index = mode_index;
1745   ctx->mic = *xd->mode_info_context;
1746
1747   if (partition)
1748     ctx->partition_info = *partition;
1749
1750   ctx->best_ref_mv.as_int = ref_mv->as_int;
1751   ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
1752
1753   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
1754   ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
1755   ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
1756
1757   memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
1758 }
1759
1760 static void setup_pred_block(const MACROBLOCKD *xd,
1761                              struct buf_2d dst[MAX_MB_PLANE],
1762                              const YV12_BUFFER_CONFIG *src,
1763                              int mi_row, int mi_col,
1764                              const struct scale_factors *scale,
1765                              const struct scale_factors *scale_uv) {
1766   int i;
1767
1768   dst[0].buf = src->y_buffer;
1769   dst[0].stride = src->y_stride;
1770   dst[1].buf = src->u_buffer;
1771   dst[2].buf = src->v_buffer;
1772   dst[1].stride = dst[2].stride = src->uv_stride;
1773 #if CONFIG_ALPHA
1774   dst[3].buf = src->alpha_buffer;
1775   dst[3].stride = src->alpha_stride;
1776 #endif
1777
1778   // TODO(jkoleszar): Make scale factors per-plane data
1779   for (i = 0; i < MAX_MB_PLANE; i++) {
1780     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
1781                      i ? scale_uv : scale,
1782                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1783   }
1784 }
1785
1786 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1787                                int idx, MV_REFERENCE_FRAME frame_type,
1788                                enum BlockSize block_size,
1789                                int mi_row, int mi_col,
1790                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
1791                                int_mv frame_near_mv[MAX_REF_FRAMES],
1792                                struct buf_2d yv12_mb[4][MAX_MB_PLANE],
1793                                struct scale_factors scale[MAX_REF_FRAMES]) {
1794   VP9_COMMON *cm = &cpi->common;
1795   YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
1796   MACROBLOCKD *const xd = &x->e_mbd;
1797   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1798
1799   // set up scaling factors
1800   scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
1801   scale[frame_type].x_offset_q4 =
1802       (mi_col * MI_SIZE * scale[frame_type].x_num /
1803        scale[frame_type].x_den) & 0xf;
1804   scale[frame_type].y_offset_q4 =
1805       (mi_row * MI_SIZE * scale[frame_type].y_num /
1806        scale[frame_type].y_den) & 0xf;
1807
1808   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1809   // use the UV scaling factors.
1810   setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
1811                    &scale[frame_type], &scale[frame_type]);
1812
1813   // Gets an initial list of candidate vectors from neighbours and orders them
1814   vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
1815                    xd->prev_mode_info_context,
1816                    frame_type,
1817                    mbmi->ref_mvs[frame_type],
1818                    cpi->common.ref_frame_sign_bias);
1819
1820   // Candidate refinement carried out at encoder and decoder
1821   vp9_find_best_ref_mvs(xd,
1822                         mbmi->ref_mvs[frame_type],
1823                         &frame_nearest_mv[frame_type],
1824                         &frame_near_mv[frame_type]);
1825
1826   // Further refinement that is encode side only to test the top few candidates
1827   // in full and choose the best as the centre point for subsequent searches.
1828   // The current implementation doesn't support scaling.
1829   if (scale[frame_type].x_num == scale[frame_type].x_den &&
1830       scale[frame_type].y_num == scale[frame_type].y_den)
1831     mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
1832             frame_type, block_size);
1833 }
1834
1835 static void model_rd_from_var_lapndz(int var, int n, int qstep,
1836                                      int *rate, int *dist) {
1837   // This function models the rate and distortion for a Laplacian
1838   // source with given variance when quantized with a uniform quantizer
1839   // with given stepsize. The closed form expressions are in:
1840   // Hang and Chen, "Source Model for transform video coder and its
1841   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
1842   // Sys. for Video Tech., April 1997.
1843   // The function is implemented as piecewise approximation to the
1844   // exact computation.
1845   // TODO(debargha): Implement the functions by interpolating from a
1846   // look-up table
1847   vp9_clear_system_state();
1848   {
1849     double D, R;
1850     double s2 = (double) var / n;
1851     double s = sqrt(s2);
1852     double x = qstep / s;
1853     if (x > 1.0) {
1854       double y = exp(-x / 2);
1855       double y2 = y * y;
1856       D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
1857       R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
1858     } else {
1859       double x2 = x * x;
1860       D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
1861       if (x > 0.125)
1862         R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
1863                  0.1626989668625);
1864       else
1865         R = -1.442252874826093 * log(x) + 1.944647760719664;
1866     }
1867     if (R < 0) {
1868       *rate = 0;
1869       *dist = var;
1870     } else {
1871       *rate = (n * R * 256 + 0.5);
1872       *dist = (n * D * s2 + 0.5);
1873     }
1874   }
1875   vp9_clear_system_state();
1876 }
1877
1878 static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
1879                                            struct macroblockd_plane *pd) {
1880   return get_block_size(plane_block_width(bsize, pd),
1881                         plane_block_height(bsize, pd));
1882 }
1883
1884 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1885                             MACROBLOCK *x, MACROBLOCKD *xd,
1886                             int *out_rate_sum, int *out_dist_sum) {
1887   // Note our transform coeffs are 8 times an orthogonal transform.
1888   // Hence quantizer step is also 8 times. To get effective quantizer
1889   // we need to divide by 8 before sending to modeling function.
1890   unsigned int sse, var;
1891   int i, rate_sum = 0, dist_sum = 0;
1892
1893   for (i = 0; i < MAX_MB_PLANE; ++i) {
1894     struct macroblock_plane *const p = &x->plane[i];
1895     struct macroblockd_plane *const pd = &xd->plane[i];
1896
1897     // TODO(dkovalev) the same code in get_plane_block_size
1898     const int bw = plane_block_width(bsize, pd);
1899     const int bh = plane_block_height(bsize, pd);
1900     const enum BlockSize bs = get_block_size(bw, bh);
1901     int rate, dist;
1902     var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
1903                              pd->dst.buf, pd->dst.stride, &sse);
1904     model_rd_from_var_lapndz(var, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
1905
1906     rate_sum += rate;
1907     dist_sum += dist;
1908   }
1909
1910   *out_rate_sum = rate_sum;
1911   *out_dist_sum = dist_sum;
1912 }
1913
1914 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
1915   MACROBLOCKD *xd = &x->e_mbd;
1916   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1917
1918   const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
1919   const int m = vp9_switchable_interp_map[mbmi->interp_filter];
1920   return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
1921 }
1922
1923 static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1924                                     BLOCK_SIZE_TYPE bsize,
1925                                     int_mv *frame_mv,
1926                                     YV12_BUFFER_CONFIG **scaled_ref_frame,
1927                                     int mi_row, int mi_col,
1928                                     int_mv single_newmv[MAX_REF_FRAMES]) {
1929   int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
1930   MACROBLOCKD *xd = &x->e_mbd;
1931   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1932   int refs[2] = { mbmi->ref_frame,
1933                   (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
1934   int_mv ref_mv[2];
1935   const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
1936   int ite;
1937   // Prediction buffer from second frame.
1938   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1939
1940   // Do joint motion search in compound mode to get more accurate mv.
1941   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
1942   struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
1943   struct buf_2d scaled_first_yv12;
1944   int last_besterr[2] = {INT_MAX, INT_MAX};
1945
1946   ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
1947   ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
1948
1949   if (scaled_ref_frame[0]) {
1950     int i;
1951     // Swap out the reference frame for a version that's been scaled to
1952     // match the resolution of the current frame, allowing the existing
1953     // motion search code to be used without additional modifications.
1954     for (i = 0; i < MAX_MB_PLANE; i++)
1955       backup_yv12[i] = xd->plane[i].pre[0];
1956     setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
1957                      NULL, NULL);
1958   }
1959
1960   if (scaled_ref_frame[1]) {
1961     int i;
1962     for (i = 0; i < MAX_MB_PLANE; i++)
1963       backup_second_yv12[i] = xd->plane[i].pre[1];
1964
1965     setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
1966                      NULL, NULL);
1967   }
1968
1969   xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
1970                                           mi_row, mi_col);
1971   xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
1972                                           mi_row, mi_col);
1973   scaled_first_yv12 = xd->plane[0].pre[0];
1974
1975   // Initialize mv using single prediction mode result.
1976   frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
1977   frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
1978
1979   // Allow joint search multiple times iteratively for each ref frame
1980   // and break out the search loop if it couldn't find better mv.
1981   for (ite = 0; ite < 4; ite++) {
1982     struct buf_2d ref_yv12[2];
1983     int bestsme = INT_MAX;
1984     int sadpb = x->sadperbit16;
1985     int_mv tmp_mv;
1986     int search_range = 3;
1987
1988     int tmp_col_min = x->mv_col_min;
1989     int tmp_col_max = x->mv_col_max;
1990     int tmp_row_min = x->mv_row_min;
1991     int tmp_row_max = x->mv_row_max;
1992     int id = ite % 2;
1993
1994     // Initialized here because of compiler problem in Visual Studio.
1995     ref_yv12[0] = xd->plane[0].pre[0];
1996     ref_yv12[1] = xd->plane[0].pre[1];
1997
1998     // Get pred block from second frame.
1999     vp9_build_inter_predictor(ref_yv12[!id].buf,
2000                               ref_yv12[!id].stride,
2001                               second_pred, pw,
2002                               &frame_mv[refs[!id]],
2003                               &xd->scale_factor[!id],
2004                               pw, ph, 0,
2005                               &xd->subpix);
2006
2007     // Compound motion search on first ref frame.
2008     if (id)
2009       xd->plane[0].pre[0] = ref_yv12[id];
2010     vp9_clamp_mv_min_max(x, &ref_mv[id]);
2011
2012     // Use mv result from single mode as mvp.
2013     tmp_mv.as_int = frame_mv[refs[id]].as_int;
2014
2015     tmp_mv.as_mv.col >>= 3;
2016     tmp_mv.as_mv.row >>= 3;
2017
2018     // Small-range full-pixel motion search
2019     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2020                                        search_range,
2021                                        &cpi->fn_ptr[block_size],
2022                                        x->nmvjointcost, x->mvcost,
2023                                        &ref_mv[id], second_pred,
2024                                        pw, ph);
2025
2026     x->mv_col_min = tmp_col_min;
2027     x->mv_col_max = tmp_col_max;
2028     x->mv_row_min = tmp_row_min;
2029     x->mv_row_max = tmp_row_max;
2030
2031     if (bestsme < INT_MAX) {
2032       int dis; /* TODO: use dis in distortion calculation later. */
2033       unsigned int sse;
2034
2035       bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
2036                                              &ref_mv[id],
2037                                              x->errorperbit,
2038                                              &cpi->fn_ptr[block_size],
2039                                              x->nmvjointcost, x->mvcost,
2040                                              &dis, &sse, second_pred,
2041                                              pw, ph);
2042     }
2043
2044     if (id)
2045       xd->plane[0].pre[0] = scaled_first_yv12;
2046
2047     if (bestsme < last_besterr[id]) {
2048       frame_mv[refs[id]].as_int = tmp_mv.as_int;
2049       last_besterr[id] = bestsme;
2050     } else {
2051       break;
2052     }
2053   }
2054
2055   // restore the predictor
2056   if (scaled_ref_frame[0]) {
2057     int i;
2058     for (i = 0; i < MAX_MB_PLANE; i++)
2059       xd->plane[i].pre[0] = backup_yv12[i];
2060   }
2061
2062   if (scaled_ref_frame[1]) {
2063     int i;
2064     for (i = 0; i < MAX_MB_PLANE; i++)
2065       xd->plane[i].pre[1] = backup_second_yv12[i];
2066   }
2067
2068   vpx_free(second_pred);
2069 }
2070
2071 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2072                                  BLOCK_SIZE_TYPE bsize,
2073                                  int64_t txfm_cache[],
2074                                  int *rate2, int *distortion, int *skippable,
2075                                  int *compmode_cost,
2076                                  int *rate_y, int *distortion_y,
2077                                  int *rate_uv, int *distortion_uv,
2078                                  int *mode_excluded, int *disable_skip,
2079                                  INTERPOLATIONFILTERTYPE *best_filter,
2080                                  int_mv *frame_mv,
2081                                  YV12_BUFFER_CONFIG **scaled_ref_frame,
2082                                  int mi_row, int mi_col,
2083                                  int_mv single_newmv[MAX_REF_FRAMES]) {
2084   const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
2085
2086   VP9_COMMON *cm = &cpi->common;
2087   MACROBLOCKD *xd = &x->e_mbd;
2088   const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2089   const enum BlockSize uv_block_size = get_plane_block_size(bsize,
2090                                                             &xd->plane[1]);
2091   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2092   const int is_comp_pred = (mbmi->second_ref_frame > 0);
2093   const int num_refs = is_comp_pred ? 2 : 1;
2094   const int this_mode = mbmi->mode;
2095   int i;
2096   int refs[2] = { mbmi->ref_frame,
2097                   (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
2098   int_mv cur_mv[2];
2099   int_mv ref_mv[2];
2100   int64_t this_rd = 0;
2101   unsigned char tmp_buf[MAX_MB_PLANE][64 * 64];
2102   int pred_exists = 0;
2103   int interpolating_intpel_seen = 0;
2104   int intpel_mv;
2105   int64_t rd, best_rd = INT64_MAX;
2106
2107   switch (this_mode) {
2108     case NEWMV:
2109       ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2110       ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2111
2112       if (is_comp_pred) {
2113         // Initialize mv using single prediction mode result.
2114         frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2115         frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2116
2117         if (cpi->sf.comp_inter_joint_search_thresh < bsize)
2118           iterative_motion_search(cpi, x, bsize, frame_mv, scaled_ref_frame,
2119                                   mi_row, mi_col, single_newmv);
2120
2121         if (frame_mv[refs[0]].as_int == INVALID_MV ||
2122             frame_mv[refs[1]].as_int == INVALID_MV)
2123           return INT64_MAX;
2124         *rate2 += vp9_mv_bit_cost(&frame_mv[refs[0]],
2125                                   &ref_mv[0],
2126                                   x->nmvjointcost, x->mvcost, 96,
2127                                   x->e_mbd.allow_high_precision_mv);
2128         *rate2 += vp9_mv_bit_cost(&frame_mv[refs[1]],
2129                                   &ref_mv[1],
2130                                   x->nmvjointcost, x->mvcost, 96,
2131                                   x->e_mbd.allow_high_precision_mv);
2132       } else {
2133         struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2134         int bestsme = INT_MAX;
2135         int further_steps, step_param = cpi->sf.first_step;
2136         int sadpb = x->sadperbit16;
2137         int_mv mvp_full, tmp_mv;
2138         int sr = 0;
2139
2140         int tmp_col_min = x->mv_col_min;
2141         int tmp_col_max = x->mv_col_max;
2142         int tmp_row_min = x->mv_row_min;
2143         int tmp_row_max = x->mv_row_max;
2144
2145         if (scaled_ref_frame[0]) {
2146           int i;
2147
2148           // Swap out the reference frame for a version that's been scaled to
2149           // match the resolution of the current frame, allowing the existing
2150           // motion search code to be used without additional modifications.
2151           for (i = 0; i < MAX_MB_PLANE; i++)
2152             backup_yv12[i] = xd->plane[i].pre[0];
2153
2154           setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
2155                            NULL, NULL);
2156         }
2157
2158         vp9_clamp_mv_min_max(x, &ref_mv[0]);
2159
2160         sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
2161
2162         // mvp_full.as_int = ref_mv[0].as_int;
2163         mvp_full.as_int =
2164          mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
2165
2166         mvp_full.as_mv.col >>= 3;
2167         mvp_full.as_mv.row >>= 3;
2168
2169         // adjust search range according to sr from mv prediction
2170         step_param = MAX(step_param, sr);
2171
2172         // Further step/diamond searches as necessary
2173         further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2174
2175         bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2176                                          sadpb, further_steps, 1,
2177                                          &cpi->fn_ptr[block_size],
2178                                          &ref_mv[0], &tmp_mv);
2179
2180         x->mv_col_min = tmp_col_min;
2181         x->mv_col_max = tmp_col_max;
2182         x->mv_row_min = tmp_row_min;
2183         x->mv_row_max = tmp_row_max;
2184
2185         if (bestsme < INT_MAX) {
2186           int dis; /* TODO: use dis in distortion calculation later. */
2187           unsigned int sse;
2188           cpi->find_fractional_mv_step(x, &tmp_mv,
2189                                        &ref_mv[0],
2190                                        x->errorperbit,
2191                                        &cpi->fn_ptr[block_size],
2192                                        x->nmvjointcost, x->mvcost,
2193                                        &dis, &sse);
2194         }
2195         frame_mv[refs[0]].as_int = tmp_mv.as_int;
2196         single_newmv[refs[0]].as_int = tmp_mv.as_int;
2197
2198         // Add the new motion vector cost to our rolling cost variable
2199         *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
2200                                   x->nmvjointcost, x->mvcost,
2201                                   96, xd->allow_high_precision_mv);
2202
2203         // restore the predictor, if required
2204         if (scaled_ref_frame[0]) {
2205           int i;
2206
2207           for (i = 0; i < MAX_MB_PLANE; i++)
2208             xd->plane[i].pre[0] = backup_yv12[i];
2209         }
2210       }
2211       break;
2212     case NEARMV:
2213     case NEARESTMV:
2214     case ZEROMV:
2215     default:
2216       break;
2217   }
2218   for (i = 0; i < num_refs; ++i) {
2219     cur_mv[i] = frame_mv[refs[i]];
2220     // Clip "next_nearest" so that it does not extend to far out of image
2221     if (this_mode == NEWMV)
2222       assert(!clamp_mv2(&cur_mv[i], xd));
2223     else
2224       clamp_mv2(&cur_mv[i], xd);
2225
2226     if (mv_check_bounds(x, &cur_mv[i]))
2227       return INT64_MAX;
2228     mbmi->mv[i].as_int = cur_mv[i].as_int;
2229   }
2230
2231
2232   /* We don't include the cost of the second reference here, because there
2233    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2234    * words if you present them in that order, the second one is always known
2235    * if the first is known */
2236   *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP),
2237                                 is_comp_pred);
2238   *rate2 += vp9_cost_mv_ref(cpi, this_mode,
2239                             mbmi->mb_mode_context[mbmi->ref_frame]);
2240
2241   pred_exists = 0;
2242   interpolating_intpel_seen = 0;
2243   // Are all MVs integer pel for Y and UV
2244   intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2245               (mbmi->mv[0].as_mv.col & 15) == 0;
2246   if (is_comp_pred)
2247     intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2248                  (mbmi->mv[1].as_mv.col & 15) == 0;
2249   // Search for best switchable filter by checking the variance of
2250   // pred error irrespective of whether the filter will be used
2251   if (cpi->speed > 4) {
2252     *best_filter = EIGHTTAP;
2253   } else {
2254     int i, newbest;
2255     int tmp_rate_sum = 0, tmp_dist_sum = 0;
2256     for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2257       int rs = 0;
2258       const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
2259       const int is_intpel_interp = intpel_mv &&
2260                                    vp9_is_interpolating_filter[filter];
2261       mbmi->interp_filter = filter;
2262       vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2263
2264       if (cm->mcomp_filter_type == SWITCHABLE)
2265         rs = get_switchable_rate(cm, x);
2266
2267       if (interpolating_intpel_seen && is_intpel_interp) {
2268         rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
2269       } else {
2270         int rate_sum = 0, dist_sum = 0;
2271         vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2272         model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2273         rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
2274         if (!interpolating_intpel_seen && is_intpel_interp) {
2275           tmp_rate_sum = rate_sum;
2276           tmp_dist_sum = dist_sum;
2277         }
2278       }
2279       newbest = i == 0 || rd < best_rd;
2280
2281       if (newbest) {
2282         best_rd = rd;
2283         *best_filter = mbmi->interp_filter;
2284       }
2285
2286       if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2287           (cm->mcomp_filter_type != SWITCHABLE &&
2288            cm->mcomp_filter_type == mbmi->interp_filter)) {
2289         int p;
2290
2291         for (p = 0; p < MAX_MB_PLANE; p++) {
2292           const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2293           const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2294           int i;
2295
2296           for (i = 0; i < y; i++)
2297             vpx_memcpy(&tmp_buf[p][64 * i],
2298                        xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x);
2299         }
2300         pred_exists = 1;
2301       }
2302       interpolating_intpel_seen |= is_intpel_interp;
2303     }
2304   }
2305
2306   // Set the appripriate filter
2307   mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2308                              cm->mcomp_filter_type : *best_filter;
2309   vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2310
2311
2312   if (pred_exists) {
2313     int p;
2314
2315     for (p = 0; p < MAX_MB_PLANE; p++) {
2316       const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2317       const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2318       int i;
2319
2320       for (i = 0; i < y; i++)
2321         vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
2322                    &tmp_buf[p][64 * i], x);
2323     }
2324   } else {
2325     // Handles the special case when a filter that is not in the
2326     // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2327     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2328   }
2329
2330   if (cpi->common.mcomp_filter_type == SWITCHABLE)
2331     *rate2 += get_switchable_rate(cm, x);
2332
2333   if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2334     x->skip = 1;
2335   else if (x->encode_breakout) {
2336     unsigned int var, sse;
2337     int threshold = (xd->plane[0].dequant[1]
2338                      * xd->plane[0].dequant[1] >> 4);
2339
2340     if (threshold < x->encode_breakout)
2341       threshold = x->encode_breakout;
2342
2343     var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
2344                                      x->plane[0].src.stride,
2345                                      xd->plane[0].dst.buf,
2346                                      xd->plane[0].dst.stride,
2347                                      &sse);
2348
2349     if ((int)sse < threshold) {
2350       unsigned int q2dc = xd->plane[0].dequant[0];
2351       /* If there is no codeable 2nd order dc
2352          or a very small uniform pixel change change */
2353       if ((sse - var < q2dc * q2dc >> 4) ||
2354           (sse / 2 > var && sse - var < 64)) {
2355         // Check u and v to make sure skip is ok
2356         int sse2;
2357         unsigned int sse2u, sse2v;
2358         var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
2359                                             x->plane[1].src.stride,
2360                                             xd->plane[1].dst.buf,
2361                                             xd->plane[1].dst.stride, &sse2u);
2362         var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
2363                                             x->plane[1].src.stride,
2364                                             xd->plane[2].dst.buf,
2365                                             xd->plane[1].dst.stride, &sse2v);
2366         sse2 = sse2u + sse2v;
2367
2368         if (sse2 * 2 < threshold) {
2369           x->skip = 1;
2370           *distortion = sse + sse2;
2371           *rate2 = 500;
2372
2373           /* for best_yrd calculation */
2374           *rate_uv = 0;
2375           *distortion_uv = sse2;
2376
2377           *disable_skip = 1;
2378           this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2379         }
2380       }
2381     }
2382   }
2383
2384   if (!x->skip) {
2385     int skippable_y, skippable_uv;
2386
2387     // Y cost and distortion
2388     super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
2389                     bsize, txfm_cache);
2390
2391     *rate2 += *rate_y;
2392     *distortion += *distortion_y;
2393
2394     super_block_uvrd(cm, x, rate_uv, distortion_uv,
2395                      &skippable_uv, bsize);
2396
2397     *rate2 += *rate_uv;
2398     *distortion += *distortion_uv;
2399     *skippable = skippable_y && skippable_uv;
2400   }
2401
2402   if (!(*mode_excluded)) {
2403     if (is_comp_pred) {
2404       *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2405     } else {
2406       *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2407     }
2408   }
2409
2410   return this_rd;  // if 0, this will be re-calculated by caller
2411 }
2412
2413 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2414                                int *returnrate, int *returndist,
2415                                BLOCK_SIZE_TYPE bsize,
2416                                PICK_MODE_CONTEXT *ctx) {
2417   VP9_COMMON *cm = &cpi->common;
2418   MACROBLOCKD *xd = &x->e_mbd;
2419   int rate_y = 0, rate_uv;
2420   int rate_y_tokenonly = 0, rate_uv_tokenonly;
2421   int dist_y = 0, dist_uv;
2422   int y_skip = 0, uv_skip;
2423   int64_t txfm_cache[NB_TXFM_MODES], err;
2424   MB_PREDICTION_MODE mode;
2425   TX_SIZE txfm_size;
2426   int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
2427   int64_t err4x4 = INT64_MAX;
2428   int i;
2429
2430   vpx_memset(&txfm_cache,0,sizeof(txfm_cache));
2431   ctx->skip = 0;
2432   xd->mode_info_context->mbmi.mode = DC_PRED;
2433   xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2434   err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2435                                &dist_y, &y_skip, bsize, txfm_cache);
2436   mode = xd->mode_info_context->mbmi.mode;
2437   txfm_size = xd->mode_info_context->mbmi.txfm_size;
2438   rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
2439                           &dist_uv, &uv_skip,
2440                           (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2441                                                        bsize);
2442   if (bsize < BLOCK_SIZE_SB8X8)
2443     err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
2444                                        &rate4x4_y_tokenonly,
2445                                        &dist4x4_y, err);
2446
2447   if (y_skip && uv_skip) {
2448     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2449                   vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
2450     *returndist = dist_y + (dist_uv >> 2);
2451     memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2452     xd->mode_info_context->mbmi.mode = mode;
2453     xd->mode_info_context->mbmi.txfm_size = txfm_size;
2454   } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
2455     *returnrate = rate4x4_y + rate_uv +
2456         vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2457     *returndist = dist4x4_y + (dist_uv >> 2);
2458     vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2459     xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2460   } else {
2461     *returnrate = rate_y + rate_uv +
2462         vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2463     *returndist = dist_y + (dist_uv >> 2);
2464     for (i = 0; i < NB_TXFM_MODES; i++) {
2465       ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode];
2466     }
2467     xd->mode_info_context->mbmi.txfm_size = txfm_size;
2468     xd->mode_info_context->mbmi.mode = mode;
2469   }
2470
2471   ctx->mic = *xd->mode_info_context;
2472 }
2473
2474 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2475                                   int mi_row, int mi_col,
2476                                   int *returnrate,
2477                                   int *returndistortion,
2478                                   BLOCK_SIZE_TYPE bsize,
2479                                   PICK_MODE_CONTEXT *ctx) {
2480   VP9_COMMON *cm = &cpi->common;
2481   MACROBLOCKD *xd = &x->e_mbd;
2482   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2483   const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2484   MB_PREDICTION_MODE this_mode;
2485   MB_PREDICTION_MODE best_mode = DC_PRED;
2486   MV_REFERENCE_FRAME ref_frame, second_ref = INTRA_FRAME;
2487   unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
2488   int comp_pred, i;
2489   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2490   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2491   int_mv single_newmv[MAX_REF_FRAMES];
2492   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2493                                     VP9_ALT_FLAG };
2494   int idx_list[4] = {0,
2495                      cpi->lst_fb_idx,
2496                      cpi->gld_fb_idx,
2497                      cpi->alt_fb_idx};
2498   int64_t best_rd = INT64_MAX;
2499   int64_t best_txfm_rd[NB_TXFM_MODES];
2500   int64_t best_txfm_diff[NB_TXFM_MODES];
2501   int64_t best_pred_diff[NB_PREDICTION_TYPES];
2502   int64_t best_pred_rd[NB_PREDICTION_TYPES];
2503   MB_MODE_INFO best_mbmode;
2504   int j;
2505   int mode_index, best_mode_index = 0;
2506   unsigned int ref_costs[MAX_REF_FRAMES];
2507   int64_t best_overall_rd = INT64_MAX;
2508   INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
2509   INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
2510   int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
2511   int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB];
2512   MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
2513   struct scale_factors scale_factor[4];
2514   unsigned int ref_frame_mask = 0;
2515   unsigned int mode_mask = 0;
2516   int64_t mode_distortions[MB_MODE_COUNT] = {-1};
2517   int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
2518   int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
2519                                              cpi->common.y_dc_delta_q);
2520   int_mv seg_mvs[4][MAX_REF_FRAMES];
2521   union b_mode_info best_bmodes[4];
2522   PARTITION_INFO best_partition;
2523   int bwsl = b_width_log2(bsize);
2524   int bws = (1 << bwsl) / 4;  // mode_info step for subsize
2525   int bhsl = b_width_log2(bsize);
2526   int bhs = (1 << bhsl) / 4;  // mode_info step for subsize
2527
2528   for (i = 0; i < 4; i++) {
2529     int j;
2530
2531     for (j = 0; j < MAX_REF_FRAMES; j++)
2532       seg_mvs[i][j].as_int = INVALID_MV;
2533   }
2534   // Everywhere the flag is set the error is much higher than its neighbors.
2535   ctx->frames_with_high_error = 0;
2536   ctx->modes_with_high_error = 0;
2537
2538   xd->mode_info_context->mbmi.segment_id = segment_id;
2539   estimate_ref_frame_costs(cpi, segment_id, ref_costs);
2540   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
2541   vpx_memset(&single_newmv, 0, sizeof(single_newmv));
2542
2543   for (i = 0; i < NB_PREDICTION_TYPES; ++i)
2544     best_pred_rd[i] = INT64_MAX;
2545   for (i = 0; i < NB_TXFM_MODES; i++)
2546     best_txfm_rd[i] = INT64_MAX;
2547
2548   // Create a mask set to 1 for each frame used by a smaller resolution.
2549   if (cpi->speed > 0) {
2550     switch (block_size) {
2551       case BLOCK_64X64:
2552         for (i = 0; i < 4; i++) {
2553           for (j = 0; j < 4; j++) {
2554             ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
2555             mode_mask |= x->mb_context[i][j].modes_with_high_error;
2556           }
2557         }
2558         for (i = 0; i < 4; i++) {
2559           ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
2560           mode_mask |= x->sb32_context[i].modes_with_high_error;
2561         }
2562         break;
2563       case BLOCK_32X32:
2564         for (i = 0; i < 4; i++) {
2565           ref_frame_mask |=
2566               x->mb_context[xd->sb_index][i].frames_with_high_error;
2567           mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
2568         }
2569         break;
2570       default:
2571         // Until we handle all block sizes set it to present;
2572         ref_frame_mask = 0;
2573         mode_mask = 0;
2574         break;
2575     }
2576     ref_frame_mask = ~ref_frame_mask;
2577     mode_mask = ~mode_mask;
2578   }
2579
2580   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
2581     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2582       setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
2583                          mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
2584                          yv12_mb, scale_factor);
2585     }
2586     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2587     frame_mv[ZEROMV][ref_frame].as_int = 0;
2588   }
2589   if (cpi->speed == 0
2590       || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
2591     mbmi->mode = DC_PRED;
2592     mbmi->ref_frame = INTRA_FRAME;
2593     for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
2594                       (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
2595                        (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
2596          i++) {
2597       mbmi->txfm_size = i;
2598       rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
2599                               &dist_uv[i], &skip_uv[i],
2600                               (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2601                                                            bsize);
2602       mode_uv[i] = mbmi->uv_mode;
2603     }
2604   }
2605
2606   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
2607     int mode_excluded = 0;
2608     int64_t this_rd = INT64_MAX;
2609     int disable_skip = 0;
2610     int other_cost = 0;
2611     int compmode_cost = 0;
2612     int rate2 = 0, rate_y = 0, rate_uv = 0;
2613     int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2614     int skippable;
2615     int64_t txfm_cache[NB_TXFM_MODES];
2616     int i;
2617
2618     for (i = 0; i < NB_TXFM_MODES; ++i)
2619       txfm_cache[i] = INT64_MAX;
2620
2621     // Test best rd so far against threshold for trying this mode.
2622     if (bsize >= BLOCK_SIZE_SB8X8 &&
2623         (best_rd < cpi->rd_threshes[mode_index] ||
2624          cpi->rd_threshes[mode_index] == INT_MAX))
2625       continue;
2626
2627     x->skip = 0;
2628     this_mode = vp9_mode_order[mode_index].mode;
2629     ref_frame = vp9_mode_order[mode_index].ref_frame;
2630
2631     if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
2632       if (!(ref_frame_mask & (1 << ref_frame))) {
2633         continue;
2634       }
2635       if (!(mode_mask & (1 << this_mode))) {
2636         continue;
2637       }
2638       if (vp9_mode_order[mode_index].second_ref_frame != NONE
2639           && !(ref_frame_mask
2640               & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
2641         continue;
2642       }
2643     }
2644
2645     mbmi->ref_frame = ref_frame;
2646     mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
2647
2648     if (!(ref_frame == INTRA_FRAME
2649         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
2650       continue;
2651     }
2652     if (!(mbmi->second_ref_frame == NONE
2653         || (cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))) {
2654       continue;
2655     }
2656
2657     // TODO(jingning, jkoleszar): scaling reference frame not supported for
2658     // SPLITMV.
2659     if (mbmi->ref_frame > 0 &&
2660           (scale_factor[mbmi->ref_frame].x_num !=
2661            scale_factor[mbmi->ref_frame].x_den ||
2662            scale_factor[mbmi->ref_frame].y_num !=
2663            scale_factor[mbmi->ref_frame].y_den) &&
2664         this_mode == SPLITMV)
2665       continue;
2666
2667     if (mbmi->second_ref_frame > 0 &&
2668           (scale_factor[mbmi->second_ref_frame].x_num !=
2669            scale_factor[mbmi->second_ref_frame].x_den ||
2670            scale_factor[mbmi->second_ref_frame].y_num !=
2671            scale_factor[mbmi->second_ref_frame].y_den) &&
2672         this_mode == SPLITMV)
2673       continue;
2674
2675     set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
2676                       scale_factor);
2677     comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
2678     mbmi->mode = this_mode;
2679     mbmi->uv_mode = DC_PRED;
2680
2681     // Evaluate all sub-pel filters irrespective of whether we can use
2682     // them for this frame.
2683     mbmi->interp_filter = cm->mcomp_filter_type;
2684     vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2685
2686     if (bsize >= BLOCK_SIZE_SB8X8 &&
2687         (this_mode == I4X4_PRED || this_mode == SPLITMV))
2688       continue;
2689     if (bsize < BLOCK_SIZE_SB8X8 &&
2690         !(this_mode == I4X4_PRED || this_mode == SPLITMV))
2691       continue;
2692
2693     if (comp_pred) {
2694       if (ref_frame == ALTREF_FRAME) {
2695         second_ref = LAST_FRAME;
2696       } else {
2697         second_ref = ref_frame + 1;
2698       }
2699       if (!(cpi->ref_frame_flags & flag_list[second_ref]))
2700         continue;
2701       mbmi->second_ref_frame = second_ref;
2702       set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
2703                         scale_factor);
2704
2705       mode_excluded =
2706           mode_excluded ?
2707               mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
2708     } else {
2709       // mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
2710       if (ref_frame != INTRA_FRAME) {
2711         if (mbmi->second_ref_frame != INTRA_FRAME)
2712           mode_excluded =
2713               mode_excluded ?
2714                   mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
2715       }
2716     }
2717
2718     // Select predictors
2719     for (i = 0; i < MAX_MB_PLANE; i++) {
2720       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2721       if (comp_pred)
2722         xd->plane[i].pre[1] = yv12_mb[second_ref][i];
2723     }
2724
2725     // If the segment reference frame feature is enabled....
2726     // then do nothing if the current ref frame is not allowed..
2727     if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
2728         !vp9_check_segref(xd, segment_id, ref_frame)) {
2729       continue;
2730     // If the segment skip feature is enabled....
2731     // then do nothing if the current mode is not allowed..
2732     } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
2733                (this_mode != ZEROMV)) {
2734       continue;
2735     // Disable this drop out case if the ref frame
2736     // segment level feature is enabled for this segment. This is to
2737     // prevent the possibility that we end up unable to pick any mode.
2738     } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
2739       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2740       // unless ARNR filtering is enabled in which case we want
2741       // an unfiltered alternative
2742       if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2743         if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
2744           continue;
2745         }
2746       }
2747     }
2748     // TODO(JBB): This is to make up for the fact that we don't have sad
2749     // functions that work when the block size reads outside the umv.  We
2750     // should fix this either by making the motion search just work on
2751     // a representative block in the boundary ( first ) and then implement a
2752     // function that does sads when inside the border..
2753     if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
2754         this_mode == NEWMV) {
2755       continue;
2756     }
2757
2758     if (this_mode == I4X4_PRED) {
2759       int rate;
2760
2761       mbmi->txfm_size = TX_4X4;
2762       rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
2763                                 &distortion_y, INT64_MAX);
2764       rate2 += rate;
2765       rate2 += intra_cost_penalty;
2766       distortion2 += distortion_y;
2767
2768       rate2 += rate_uv_intra[TX_4X4];
2769       rate_uv = rate_uv_intra[TX_4X4];
2770       distortion2 += dist_uv[TX_4X4];
2771       distortion_uv = dist_uv[TX_4X4];
2772       mbmi->uv_mode = mode_uv[TX_4X4];
2773       txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2774       for (i = 0; i < NB_TXFM_MODES; ++i)
2775         txfm_cache[i] = txfm_cache[ONLY_4X4];
2776     } else if (ref_frame == INTRA_FRAME) {
2777       TX_SIZE uv_tx;
2778       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2779                       bsize, txfm_cache);
2780
2781       uv_tx = mbmi->txfm_size;
2782       if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
2783         uv_tx = TX_4X4;
2784       if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
2785         uv_tx = TX_8X8;
2786       else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
2787         uv_tx = TX_16X16;
2788
2789       rate_uv = rate_uv_intra[uv_tx];
2790       distortion_uv = dist_uv[uv_tx];
2791       skippable = skippable && skip_uv[uv_tx];
2792       mbmi->uv_mode = mode_uv[uv_tx];
2793
2794       rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv;
2795       if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
2796         rate2 += intra_cost_penalty;
2797       distortion2 = distortion_y + distortion_uv;
2798     } else if (this_mode == SPLITMV) {
2799       const int is_comp_pred = mbmi->second_ref_frame > 0;
2800       int rate, distortion;
2801       int64_t this_rd_thresh;
2802       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
2803       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
2804       int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
2805       int switchable_filter_index;
2806       int_mv *second_ref = is_comp_pred ?
2807           &mbmi->ref_mvs[mbmi->second_ref_frame][0] : NULL;
2808       union b_mode_info tmp_best_bmodes[16];
2809       MB_MODE_INFO tmp_best_mbmode;
2810       PARTITION_INFO tmp_best_partition;
2811       int pred_exists = 0;
2812       int uv_skippable;
2813
2814       this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ?
2815           cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
2816       this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ?
2817           cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
2818       xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2819
2820       for (switchable_filter_index = 0;
2821            switchable_filter_index < VP9_SWITCHABLE_FILTERS;
2822            ++switchable_filter_index) {
2823         int newbest;
2824         mbmi->interp_filter =
2825         vp9_switchable_interp[switchable_filter_index];
2826         vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2827
2828         tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2829                                              &mbmi->ref_mvs[mbmi->ref_frame][0],
2830                                              second_ref, INT64_MAX,
2831                                              &rate, &rate_y, &distortion,
2832                                              &skippable,
2833                                              (int)this_rd_thresh, seg_mvs,
2834                                              mi_row, mi_col);
2835         if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2836           const int rs = get_switchable_rate(cm, x);
2837           tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
2838         }
2839         newbest = (tmp_rd < tmp_best_rd);
2840         if (newbest) {
2841           tmp_best_filter = mbmi->interp_filter;
2842           tmp_best_rd = tmp_rd;
2843         }
2844         if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
2845             (mbmi->interp_filter == cm->mcomp_filter_type &&
2846              cm->mcomp_filter_type != SWITCHABLE)) {
2847               tmp_best_rdu = tmp_rd;
2848               tmp_best_rate = rate;
2849               tmp_best_ratey = rate_y;
2850               tmp_best_distortion = distortion;
2851               tmp_best_skippable = skippable;
2852               tmp_best_mbmode = *mbmi;
2853               tmp_best_partition = *x->partition_info;
2854               for (i = 0; i < 4; i++)
2855                 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
2856               pred_exists = 1;
2857             }
2858       }  // switchable_filter_index loop
2859
2860       mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
2861                              tmp_best_filter : cm->mcomp_filter_type);
2862       vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2863       if (!pred_exists) {
2864         // Handles the special case when a filter that is not in the
2865         // switchable list (bilinear, 6-tap) is indicated at the frame level
2866         tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2867                                              &mbmi->ref_mvs[mbmi->ref_frame][0],
2868                                              second_ref, INT64_MAX,
2869                                              &rate, &rate_y, &distortion,
2870                                              &skippable,
2871                                              (int)this_rd_thresh, seg_mvs,
2872                                              mi_row, mi_col);
2873       } else {
2874         if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2875           int rs = get_switchable_rate(cm, x);
2876           tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
2877         }
2878         tmp_rd = tmp_best_rdu;
2879         rate = tmp_best_rate;
2880         rate_y = tmp_best_ratey;
2881         distortion = tmp_best_distortion;
2882         skippable = tmp_best_skippable;
2883         *mbmi = tmp_best_mbmode;
2884         *x->partition_info = tmp_best_partition;
2885         for (i = 0; i < 4; i++)
2886           xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
2887       }
2888
2889       rate2 += rate;
2890       distortion2 += distortion;
2891
2892       if (cpi->common.mcomp_filter_type == SWITCHABLE)
2893         rate2 += get_switchable_rate(cm, x);
2894
2895       // If even the 'Y' rd value of split is higher than best so far
2896       // then dont bother looking at UV
2897       vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
2898                                       BLOCK_SIZE_SB8X8);
2899       vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
2900       super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
2901                                 &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4);
2902       rate2 += rate_uv;
2903       distortion2 += distortion_uv;
2904       skippable = skippable && uv_skippable;
2905
2906       txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2907       for (i = 0; i < NB_TXFM_MODES; ++i)
2908         txfm_cache[i] = txfm_cache[ONLY_4X4];
2909
2910       if (!mode_excluded) {
2911         if (is_comp_pred)
2912           mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
2913         else
2914           mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
2915       }
2916
2917       compmode_cost =
2918           vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
2919     } else {
2920       YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
2921       int fb = get_ref_frame_idx(cpi, mbmi->ref_frame);
2922       if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
2923         scaled_ref_frame[0] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
2924
2925       if (comp_pred) {
2926         fb = get_ref_frame_idx(cpi, mbmi->second_ref_frame);
2927         if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
2928           scaled_ref_frame[1] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
2929       }
2930
2931       this_rd = handle_inter_mode(cpi, x, bsize,
2932                                   txfm_cache,
2933                                   &rate2, &distortion2, &skippable,
2934                                   &compmode_cost,
2935                                   &rate_y, &distortion_y,
2936                                   &rate_uv, &distortion_uv,
2937                                   &mode_excluded, &disable_skip,
2938                                   &tmp_best_filter, frame_mv[this_mode],
2939                                   scaled_ref_frame, mi_row, mi_col,
2940                                   single_newmv);
2941       if (this_rd == INT64_MAX)
2942         continue;
2943     }
2944
2945     if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2946       rate2 += compmode_cost;
2947     }
2948
2949     // Estimate the reference frame signaling cost and add it
2950     // to the rolling cost variable.
2951     rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame];
2952
2953     if (!disable_skip) {
2954       // Test for the condition where skip block will be activated
2955       // because there are no non zero coefficients and make any
2956       // necessary adjustment for rate. Ignore if skip is coded at
2957       // segment level as the cost wont have been added in.
2958       int mb_skip_allowed;
2959
2960       // Is Mb level skip allowed (i.e. not coded at segment level).
2961       mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
2962
2963       if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
2964         // Back out the coefficient coding costs
2965         rate2 -= (rate_y + rate_uv);
2966         // for best_yrd calculation
2967         rate_uv = 0;
2968
2969         if (mb_skip_allowed) {
2970           int prob_skip_cost;
2971
2972           // Cost the skip mb case
2973           vp9_prob skip_prob =
2974             vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
2975
2976           if (skip_prob) {
2977             prob_skip_cost = vp9_cost_bit(skip_prob, 1);
2978             rate2 += prob_skip_cost;
2979             other_cost += prob_skip_cost;
2980           }
2981         }
2982       } else if (mb_skip_allowed) {
2983         // Add in the cost of the no skip flag.
2984         int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
2985                                                         PRED_MBSKIP), 0);
2986         rate2 += prob_skip_cost;
2987         other_cost += prob_skip_cost;
2988       }
2989
2990       // Calculate the final RD estimate for this mode.
2991       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2992     }
2993
2994 #if 0
2995     // Keep record of best intra distortion
2996     if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
2997         (this_rd < best_intra_rd)) {
2998       best_intra_rd = this_rd;
2999       *returnintra = distortion2;
3000     }
3001 #endif
3002
3003     if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
3004       for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3005         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3006
3007     if (this_rd < best_overall_rd) {
3008       best_overall_rd = this_rd;
3009       best_filter = tmp_best_filter;
3010       best_mode = this_mode;
3011     }
3012
3013     if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3014       // Store the respective mode distortions for later use.
3015       if (mode_distortions[this_mode] == -1
3016           || distortion2 < mode_distortions[this_mode]) {
3017         mode_distortions[this_mode] = distortion2;
3018       }
3019       if (frame_distortions[mbmi->ref_frame] == -1
3020           || distortion2 < frame_distortions[mbmi->ref_frame]) {
3021         frame_distortions[mbmi->ref_frame] = distortion2;
3022       }
3023     }
3024
3025     // Did this mode help.. i.e. is it the new best mode
3026     if (this_rd < best_rd || x->skip) {
3027       if (!mode_excluded) {
3028         // Note index of best mode so far
3029         best_mode_index = mode_index;
3030
3031         if (ref_frame == INTRA_FRAME) {
3032           /* required for left and above block mv */
3033           mbmi->mv[0].as_int = 0;
3034         }
3035
3036         other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame];
3037         *returnrate = rate2;
3038         *returndistortion = distortion2;
3039         best_rd = this_rd;
3040         best_mbmode = *mbmi;
3041         best_partition = *x->partition_info;
3042
3043         if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3044           for (i = 0; i < 4; i++)
3045             best_bmodes[i] = xd->mode_info_context->bmi[i];
3046       }
3047 #if 0
3048       // Testing this mode gave rise to an improvement in best error score.
3049       // Lower threshold a bit for next time
3050       cpi->rd_thresh_mult[mode_index] =
3051           (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3052               cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3053       cpi->rd_threshes[mode_index] =
3054           (cpi->rd_baseline_thresh[mode_index] >> 7)
3055               * cpi->rd_thresh_mult[mode_index];
3056 #endif
3057     } else {
3058       // If the mode did not help improve the best error case then
3059       // raise the threshold for testing that mode next time around.
3060 #if 0
3061       cpi->rd_thresh_mult[mode_index] += 4;
3062
3063       if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3064         cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3065
3066       cpi->rd_threshes[mode_index] =
3067           (cpi->rd_baseline_thresh[mode_index] >> 7)
3068               * cpi->rd_thresh_mult[mode_index];
3069 #endif
3070     }
3071
3072     /* keep record of best compound/single-only prediction */
3073     if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
3074       int single_rd, hybrid_rd, single_rate, hybrid_rate;
3075
3076       if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3077         single_rate = rate2 - compmode_cost;
3078         hybrid_rate = rate2;
3079       } else {
3080         single_rate = rate2;
3081         hybrid_rate = rate2 + compmode_cost;
3082       }
3083
3084       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3085       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3086
3087       if (mbmi->second_ref_frame <= INTRA_FRAME &&
3088           single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3089         best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3090       } else if (mbmi->second_ref_frame > INTRA_FRAME &&
3091                  single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3092         best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3093       }
3094       if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3095         best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3096     }
3097
3098     /* keep record of best txfm size */
3099     if (bsize < BLOCK_SIZE_SB32X32) {
3100       if (bsize < BLOCK_SIZE_MB16X16) {
3101         if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3102           txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
3103         txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
3104       }
3105       txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
3106     }
3107     if (!mode_excluded && this_rd != INT64_MAX) {
3108       for (i = 0; i < NB_TXFM_MODES; i++) {
3109         int64_t adj_rd = INT64_MAX;
3110         if (this_mode != I4X4_PRED) {
3111           adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
3112         } else {
3113           adj_rd = this_rd;
3114         }
3115
3116         if (adj_rd < best_txfm_rd[i])
3117           best_txfm_rd[i] = adj_rd;
3118       }
3119     }
3120
3121     if (x->skip && !mode_excluded)
3122       break;
3123   }
3124   // Flag all modes that have a distortion thats > 2x the best we found at
3125   // this level.
3126   for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3127     if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3128       continue;
3129
3130     if (mode_distortions[mode_index] > 2 * *returndistortion) {
3131       ctx->modes_with_high_error |= (1 << mode_index);
3132     }
3133   }
3134
3135   // Flag all ref frames that have a distortion thats > 2x the best we found at
3136   // this level.
3137   for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3138     if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3139       ctx->frames_with_high_error |= (1 << ref_frame);
3140     }
3141   }
3142
3143   if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
3144     *returnrate = INT_MAX;
3145     *returndistortion = INT_MAX;
3146     return best_rd;
3147   }
3148
3149   assert((cm->mcomp_filter_type == SWITCHABLE) ||
3150          (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3151          (best_mbmode.ref_frame == INTRA_FRAME));
3152
3153   // Accumulate filter usage stats
3154   // TODO(agrange): Use RD criteria to select interpolation filter mode.
3155   if (is_inter_mode(best_mode))
3156     ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
3157
3158   // TODO(rbultje) integrate with RD thresholding
3159 #if 0
3160   // Reduce the activation RD thresholds for the best choice mode
3161   if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
3162       (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
3163     int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
3164
3165     cpi->rd_thresh_mult[best_mode_index] =
3166       (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
3167       cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
3168     cpi->rd_threshes[best_mode_index] =
3169       (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
3170   }
3171 #endif
3172
3173   // This code forces Altref,0,0 and skip for the frame that overlays a
3174   // an alrtef unless Altref is filtered. However, this is unsafe if
3175   // segment level coding of ref frame is enabled for this segment.
3176   if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
3177       cpi->is_src_frame_alt_ref &&
3178       (cpi->oxcf.arnr_max_frames == 0) &&
3179       (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
3180       && bsize >= BLOCK_SIZE_SB8X8) {
3181     mbmi->mode = ZEROMV;
3182     mbmi->ref_frame = ALTREF_FRAME;
3183     mbmi->second_ref_frame = NONE;
3184     mbmi->mv[0].as_int = 0;
3185     mbmi->uv_mode = DC_PRED;
3186     mbmi->mb_skip_coeff = 1;
3187     if (cm->txfm_mode == TX_MODE_SELECT) {
3188       if (bsize >= BLOCK_SIZE_SB32X32)
3189         mbmi->txfm_size = TX_32X32;
3190       else if (bsize >= BLOCK_SIZE_MB16X16)
3191         mbmi->txfm_size = TX_16X16;
3192       else
3193         mbmi->txfm_size = TX_8X8;
3194     }
3195
3196     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3197     vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
3198     goto end;
3199   }
3200
3201   // macroblock modes
3202   *mbmi = best_mbmode;
3203   if (best_mbmode.ref_frame == INTRA_FRAME &&
3204       best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3205     for (i = 0; i < 4; i++)
3206       xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3207   }
3208
3209   if (best_mbmode.ref_frame != INTRA_FRAME &&
3210       best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3211     for (i = 0; i < 4; i++)
3212       xd->mode_info_context->bmi[i].as_mv[0].as_int =
3213           best_bmodes[i].as_mv[0].as_int;
3214
3215     if (mbmi->second_ref_frame > 0)
3216       for (i = 0; i < 4; i++)
3217         xd->mode_info_context->bmi[i].as_mv[1].as_int =
3218             best_bmodes[i].as_mv[1].as_int;
3219
3220     *x->partition_info = best_partition;
3221
3222     mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
3223     mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
3224   }
3225
3226   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3227     if (best_pred_rd[i] == INT64_MAX)
3228       best_pred_diff[i] = INT_MIN;
3229     else
3230       best_pred_diff[i] = best_rd - best_pred_rd[i];
3231   }
3232
3233   if (!x->skip) {
3234     for (i = 0; i < NB_TXFM_MODES; i++) {
3235       if (best_txfm_rd[i] == INT64_MAX)
3236         best_txfm_diff[i] = 0;
3237       else
3238         best_txfm_diff[i] = best_rd - best_txfm_rd[i];
3239     }
3240   } else {
3241     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3242   }
3243
3244  end:
3245   set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
3246                     scale_factor);
3247   store_coding_context(x, ctx, best_mode_index,
3248                        &best_partition,
3249                        &mbmi->ref_mvs[mbmi->ref_frame][0],
3250                        &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
3251                                       mbmi->second_ref_frame][0],
3252                        best_pred_diff, best_txfm_diff);
3253
3254   return best_rd;
3255 }