granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "./vpx_config.h"
  12 #include "./vp9_rtcd.h"
  13 #include "vp9/encoder/vp9_encodeframe.h"
  14 #include "vp9/encoder/vp9_encodemb.h"
  15 #include "vp9/encoder/vp9_encodemv.h"
  16 #include "vp9/common/vp9_common.h"
  17 #include "vp9/encoder/vp9_onyx_int.h"
  18 #include "vp9/common/vp9_extend.h"
  19 #include "vp9/common/vp9_entropy.h"
  20 #include "vp9/common/vp9_entropymode.h"
  21 #include "vp9/common/vp9_quant_common.h"
  22 #include "vp9/encoder/vp9_segmentation.h"
  23 #include "vp9/encoder/vp9_encodeintra.h"
  24 #include "vp9/common/vp9_reconinter.h"
  25 #include "vp9/encoder/vp9_rdopt.h"
  26 #include "vp9/common/vp9_findnearmv.h"
  27 #include "vp9/common/vp9_reconintra.h"
  28 #include "vp9/common/vp9_seg_common.h"
  29 #include "vp9/common/vp9_tile_common.h"
  30 #include "vp9/encoder/vp9_tokenize.h"
  31 #include "./vp9_rtcd.h"
  32 #include <stdio.h>
  33 #include <math.h>
  34 #include <limits.h>
  35 #include "vpx_ports/vpx_timer.h"
  36 #include "vp9/common/vp9_pred_common.h"
  37 #include "vp9/common/vp9_mvref_common.h"
  38
  39 #define DBG_PRNT_SEGMAP 0
  40
  41 // #define ENC_DEBUG
  42 #ifdef ENC_DEBUG
  43 int enc_debug = 0;
  44 #endif
  45
  46 void vp9_select_interp_filter_type(VP9_COMP *cpi);
  47
  48 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
  49                               int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize);
  50
  51 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
  52
  53 /* activity_avg must be positive, or flat regions could get a zero weight
  54  *  (infinite lambda), which confounds analysis.
  55  * This also avoids the need for divide by zero checks in
  56  *  vp9_activity_masking().
  57  */
  58 #define VP9_ACTIVITY_AVG_MIN (64)
  59
  60 /* This is used as a reference when computing the source variance for the
  61  *  purposes of activity masking.
  62  * Eventually this should be replaced by custom no-reference routines,
  63  *  which will be faster.
  64  */
  65 static const uint8_t VP9_VAR_OFFS[16] = {128, 128, 128, 128, 128, 128, 128, 128,
  66     128, 128, 128, 128, 128, 128, 128, 128};
  67
  68 // Original activity measure from Tim T's code.
  69 static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) {
  70   unsigned int act;
  71   unsigned int sse;
  72   /* TODO: This could also be done over smaller areas (8x8), but that would
  73    *  require extensive changes elsewhere, as lambda is assumed to be fixed
  74    *  over an entire MB in most of the code.
  75    * Another option is to compute four 8x8 variances, and pick a single
  76    *  lambda using a non-linear combination (e.g., the smallest, or second
  77    *  smallest, etc.).
  78    */
  79   act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
  80                           VP9_VAR_OFFS, 0, &sse);
  81   act <<= 4;
  82
  83   /* If the region is flat, lower the activity some more. */
  84   if (act < 8 << 12)
  85     act = act < 5 << 12 ? act : 5 << 12;
  86
  87   return act;
  88 }
  89
  90 // Stub for alternative experimental activity measures.
  91 static unsigned int alt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
  92                                          int use_dc_pred) {
  93   return vp9_encode_intra(cpi, x, use_dc_pred);
  94 }
  95 DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = {0};
  96
  97 // Measure the activity of the current macroblock
  98 // What we measure here is TBD so abstracted to this function
  99 #define ALT_ACT_MEASURE 1
 100 static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
 101                                         int mb_row, int mb_col) {
 102   unsigned int mb_activity;
 103
 104   if (ALT_ACT_MEASURE) {
 105     int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 106
 107     // Or use and alternative.
 108     mb_activity = alt_activity_measure(cpi, x, use_dc_pred);
 109   } else {
 110     // Original activity measure from Tim T's code.
 111     mb_activity = tt_activity_measure(cpi, x);
 112   }
 113
 114   if (mb_activity < VP9_ACTIVITY_AVG_MIN)
 115     mb_activity = VP9_ACTIVITY_AVG_MIN;
 116
 117   return mb_activity;
 118 }
 119
 120 // Calculate an "average" mb activity value for the frame
 121 #define ACT_MEDIAN 0
 122 static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
 123 #if ACT_MEDIAN
 124   // Find median: Simple n^2 algorithm for experimentation
 125   {
 126     unsigned int median;
 127     unsigned int i, j;
 128     unsigned int *sortlist;
 129     unsigned int tmp;
 130
 131     // Create a list to sort to
 132     CHECK_MEM_ERROR(&cpi->common, sortlist, vpx_calloc(sizeof(unsigned int),
 133                     cpi->common.MBs));
 134
 135     // Copy map to sort list
 136     vpx_memcpy(sortlist, cpi->mb_activity_map,
 137         sizeof(unsigned int) * cpi->common.MBs);
 138
 139     // Ripple each value down to its correct position
 140     for (i = 1; i < cpi->common.MBs; i ++) {
 141       for (j = i; j > 0; j --) {
 142         if (sortlist[j] < sortlist[j - 1]) {
 143           // Swap values
 144           tmp = sortlist[j - 1];
 145           sortlist[j - 1] = sortlist[j];
 146           sortlist[j] = tmp;
 147         } else
 148         break;
 149       }
 150     }
 151
 152     // Even number MBs so estimate median as mean of two either side.
 153     median = (1 + sortlist[cpi->common.MBs >> 1] +
 154         sortlist[(cpi->common.MBs >> 1) + 1]) >> 1;
 155
 156     cpi->activity_avg = median;
 157
 158     vpx_free(sortlist);
 159   }
 160 #else
 161   // Simple mean for now
 162   cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
 163 #endif
 164
 165   if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN)
 166     cpi->activity_avg = VP9_ACTIVITY_AVG_MIN;
 167
 168   // Experimental code: return fixed value normalized for several clips
 169   if (ALT_ACT_MEASURE)
 170     cpi->activity_avg = 100000;
 171 }
 172
 173 #define USE_ACT_INDEX   0
 174 #define OUTPUT_NORM_ACT_STATS   0
 175
 176 #if USE_ACT_INDEX
 177 // Calculate an activity index for each mb
 178 static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
 179   VP9_COMMON *const cm = &cpi->common;
 180   int mb_row, mb_col;
 181
 182   int64_t act;
 183   int64_t a;
 184   int64_t b;
 185
 186 #if OUTPUT_NORM_ACT_STATS
 187   FILE *f = fopen("norm_act.stt", "a");
 188   fprintf(f, "\n%12d\n", cpi->activity_avg);
 189 #endif
 190
 191   // Reset pointers to start of activity map
 192   x->mb_activity_ptr = cpi->mb_activity_map;
 193
 194   // Calculate normalized mb activity number.
 195   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
 196     // for each macroblock col in image
 197     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
 198       // Read activity from the map
 199       act = *(x->mb_activity_ptr);
 200
 201       // Calculate a normalized activity number
 202       a = act + 4 * cpi->activity_avg;
 203       b = 4 * act + cpi->activity_avg;
 204
 205       if (b >= a)
 206       *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
 207       else
 208       *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
 209
 210 #if OUTPUT_NORM_ACT_STATS
 211       fprintf(f, " %6d", *(x->mb_activity_ptr));
 212 #endif
 213       // Increment activity map pointers
 214       x->mb_activity_ptr++;
 215     }
 216
 217 #if OUTPUT_NORM_ACT_STATS
 218     fprintf(f, "\n");
 219 #endif
 220
 221   }
 222
 223 #if OUTPUT_NORM_ACT_STATS
 224   fclose(f);
 225 #endif
 226
 227 }
 228 #endif
 229
 230 // Loop through all MBs. Note activity of each, average activity and
 231 // calculate a normalized activity for each
 232 static void build_activity_map(VP9_COMP *cpi) {
 233   MACROBLOCK * const x = &cpi->mb;
 234   MACROBLOCKD *xd = &x->e_mbd;
 235   VP9_COMMON * const cm = &cpi->common;
 236
 237 #if ALT_ACT_MEASURE
 238   YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
 239   int recon_yoffset;
 240   int recon_y_stride = new_yv12->y_stride;
 241 #endif
 242
 243   int mb_row, mb_col;
 244   unsigned int mb_activity;
 245   int64_t activity_sum = 0;
 246
 247   x->mb_activity_ptr = cpi->mb_activity_map;
 248
 249   // for each macroblock row in image
 250   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
 251 #if ALT_ACT_MEASURE
 252     // reset above block coeffs
 253     xd->up_available = (mb_row != 0);
 254     recon_yoffset = (mb_row * recon_y_stride * 16);
 255 #endif
 256     // for each macroblock col in image
 257     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
 258 #if ALT_ACT_MEASURE
 259       xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
 260       xd->left_available = (mb_col != 0);
 261       recon_yoffset += 16;
 262 #endif
 263
 264       // measure activity
 265       mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col);
 266
 267       // Keep frame sum
 268       activity_sum += mb_activity;
 269
 270       // Store MB level activity details.
 271       *x->mb_activity_ptr = mb_activity;
 272
 273       // Increment activity map pointer
 274       x->mb_activity_ptr++;
 275
 276       // adjust to the next column of source macroblocks
 277       x->plane[0].src.buf += 16;
 278     }
 279
 280     // adjust to the next row of mbs
 281     x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
 282   }
 283
 284   // Calculate an "average" MB activity
 285   calc_av_activity(cpi, activity_sum);
 286
 287 #if USE_ACT_INDEX
 288   // Calculate an activity index number of each mb
 289   calc_activity_index(cpi, x);
 290 #endif
 291
 292 }
 293
 294 // Macroblock activity masking
 295 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
 296 #if USE_ACT_INDEX
 297   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
 298   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
 299   x->errorperbit += (x->errorperbit == 0);
 300 #else
 301   int64_t a;
 302   int64_t b;
 303   int64_t act = *(x->mb_activity_ptr);
 304
 305   // Apply the masking to the RD multiplier.
 306   a = act + (2 * cpi->activity_avg);
 307   b = (2 * act) + cpi->activity_avg;
 308
 309   x->rdmult = (unsigned int) (((int64_t) x->rdmult * b + (a >> 1)) / a);
 310   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
 311   x->errorperbit += (x->errorperbit == 0);
 312 #endif
 313
 314   // Activity based Zbin adjustment
 315   adjust_act_zbin(cpi, x);
 316 }
 317
 318 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
 319                          BLOCK_SIZE_TYPE bsize, int output_enabled) {
 320   int i, x_idx, y;
 321   MACROBLOCK * const x = &cpi->mb;
 322   MACROBLOCKD * const xd = &x->e_mbd;
 323   MODE_INFO *mi = &ctx->mic;
 324   MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
 325
 326   int mb_mode_index = ctx->best_mode_index;
 327   const int mis = cpi->common.mode_info_stride;
 328   const int bh = 1 << mi_height_log2(bsize), bw = 1 << mi_width_log2(bsize);
 329   const MB_PREDICTION_MODE mb_mode = mi->mbmi.mode;
 330
 331   assert(mb_mode < MB_MODE_COUNT);
 332   assert(mb_mode_index < MAX_MODES);
 333   assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
 334   assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES);
 335   assert(mi->mbmi.sb_type == bsize);
 336
 337   // Restore the coding context of the MB to that that was in place
 338   // when the mode was picked for it
 339   for (y = 0; y < bh; y++) {
 340     for (x_idx = 0; x_idx < bw; x_idx++) {
 341       if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > x_idx
 342           && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > y) {
 343         MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis;
 344         *mi_addr = *mi;
 345       }
 346     }
 347   }
 348   if (bsize < BLOCK_SIZE_SB32X32) {
 349     if (bsize < BLOCK_SIZE_MB16X16)
 350       ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];
 351     ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16];
 352   }
 353
 354   if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) {
 355     *x->partition_info = ctx->partition_info;
 356     mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
 357     mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
 358   }
 359
 360   x->skip = ctx->skip;
 361   if (!output_enabled)
 362     return;
 363
 364   if (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
 365     for (i = 0; i < NB_TXFM_MODES; i++) {
 366       cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
 367     }
 368   }
 369
 370   if (cpi->common.frame_type == KEY_FRAME) {
 371     // Restore the coding modes to that held in the coding context
 372     // if (mb_mode == I4X4_PRED)
 373     //    for (i = 0; i < 16; i++)
 374     //    {
 375     //        xd->block[i].bmi.as_mode =
 376     //                          xd->mode_info_context->bmi[i].as_mode;
 377     //        assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT);
 378     //    }
 379 #if CONFIG_INTERNAL_STATS
 380     static const int kf_mode_index[] = {
 381       THR_DC /*DC_PRED*/,
 382       THR_V_PRED /*V_PRED*/,
 383       THR_H_PRED /*H_PRED*/,
 384       THR_D45_PRED /*D45_PRED*/,
 385       THR_D135_PRED /*D135_PRED*/,
 386       THR_D117_PRED /*D117_PRED*/,
 387       THR_D153_PRED /*D153_PRED*/,
 388       THR_D27_PRED /*D27_PRED*/,
 389       THR_D63_PRED /*D63_PRED*/,
 390       THR_TM /*TM_PRED*/,
 391       THR_B_PRED /*I4X4_PRED*/,
 392     };
 393     cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
 394 #endif
 395   } else {
 396     // Note how often each mode chosen as best
 397     cpi->mode_chosen_counts[mb_mode_index]++;
 398     if (mbmi->ref_frame[0] != INTRA_FRAME
 399         && (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) {
 400       int_mv best_mv, best_second_mv;
 401       const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
 402       const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
 403       best_mv.as_int = ctx->best_ref_mv.as_int;
 404       best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
 405       if (mbmi->mode == NEWMV) {
 406         best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
 407         best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
 408       }
 409       mbmi->best_mv.as_int = best_mv.as_int;
 410       mbmi->best_second_mv.as_int = best_second_mv.as_int;
 411       vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
 412     }
 413
 414     if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
 415       int i, j;
 416       for (j = 0; j < bh; ++j)
 417         for (i = 0; i < bw; ++i)
 418           if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > i
 419               && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > j)
 420             xd->mode_info_context[mis * j + i].mbmi = *mbmi;
 421     }
 422
 423     if (cpi->common.mcomp_filter_type == SWITCHABLE
 424         && is_inter_mode(mbmi->mode)) {
 425       ++cpi->common.fc.switchable_interp_count[vp9_get_pred_context(
 426           &cpi->common, xd, PRED_SWITCHABLE_INTERP)][vp9_switchable_interp_map[mbmi
 427           ->interp_filter]];
 428     }
 429
 430     cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
 431     cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
 432     cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
 433   }
 434 }
 435
 436 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
 437                           int mb_row, int mb_col) {
 438   uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, src
 439       ->alpha_buffer};
 440   int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, src
 441       ->alpha_stride};
 442   int i;
 443
 444   for (i = 0; i < MAX_MB_PLANE; i++) {
 445     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mb_row, mb_col,
 446                      NULL, x->e_mbd.plane[i].subsampling_x,
 447                      x->e_mbd.plane[i].subsampling_y);
 448   }
 449 }
 450
 451 static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
 452                         BLOCK_SIZE_TYPE bsize) {
 453   MACROBLOCK * const x = &cpi->mb;
 454   VP9_COMMON * const cm = &cpi->common;
 455   MACROBLOCKD * const xd = &x->e_mbd;
 456   MB_MODE_INFO *mbmi;
 457   const int dst_fb_idx = cm->new_fb_idx;
 458   const int idx_str = xd->mode_info_stride * mi_row + mi_col;
 459   const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
 460   const int mb_row = mi_row >> 1;
 461   const int mb_col = mi_col >> 1;
 462   const int idx_map = mb_row * cm->mb_cols + mb_col;
 463   int i;
 464
 465   // entropy context structures
 466   for (i = 0; i < MAX_MB_PLANE; i++) {
 467     xd->plane[i].above_context = cm->above_context[i]
 468         + (mi_col * 2 >> xd->plane[i].subsampling_x);
 469     xd->plane[i].left_context = cm->left_context[i]
 470         + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
 471   }
 472
 473   // partition contexts
 474   set_partition_seg_context(cm, xd, mi_row, mi_col);
 475
 476   // Activity map pointer
 477   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
 478   x->active_ptr = cpi->active_map + idx_map;
 479
 480   /* pointers to mode info contexts */
 481   x->partition_info = x->pi + idx_str;
 482   xd->mode_info_context = cm->mi + idx_str;
 483   mbmi = &xd->mode_info_context->mbmi;
 484   // Special case: if prev_mi is NULL, the previous mode info context
 485   // cannot be used.
 486   xd->prev_mode_info_context = cm->prev_mi ? cm->prev_mi + idx_str : NULL;
 487
 488   // Set up destination pointers
 489   setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col);
 490
 491   /* Set up limit values for MV components to prevent them from
 492    * extending beyond the UMV borders assuming 16x16 block size */
 493   x->mv_row_min = -((mi_row * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
 494   x->mv_col_min = -((mi_col * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
 495   x->mv_row_max = ((cm->mi_rows - mi_row) * MI_SIZE
 496       + (VP9BORDERINPIXELS - MI_SIZE * bh - VP9_INTERP_EXTEND));
 497   x->mv_col_max = ((cm->mi_cols - mi_col) * MI_SIZE
 498       + (VP9BORDERINPIXELS - MI_SIZE * bw - VP9_INTERP_EXTEND));
 499
 500   // Set up distance of MB to edge of frame in 1/8th pel units
 501   assert(!(mi_col & (bw - 1)) && !(mi_row & (bh - 1)));
 502   set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
 503
 504   /* set up source buffers */
 505   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
 506
 507   /* R/D setup */
 508   x->rddiv = cpi->RDDIV;
 509   x->rdmult = cpi->RDMULT;
 510
 511   /* segment ID */
 512   if (xd->segmentation_enabled) {
 513     uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map
 514                                                   : cm->last_frame_seg_map;
 515     mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
 516
 517     vp9_mb_init_quantizer(cpi, x);
 518
 519     if (xd->segmentation_enabled && cpi->seg0_cnt > 0
 520         && !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME)
 521         && vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) {
 522       cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
 523     } else {
 524       const int y = mb_row & ~3;
 525       const int x = mb_col & ~3;
 526       const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
 527       const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
 528       const int tile_progress = cm->cur_tile_mi_col_start * cm->mb_rows >> 1;
 529       const int mb_cols = (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start)
 530           >> 1;
 531
 532       cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
 533           << 16) / cm->MBs;
 534     }
 535   } else {
 536     mbmi->segment_id = 0;
 537   }
 538 }
 539
 540 static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
 541                           TOKENEXTRA **tp, int *totalrate, int64_t *totaldist,
 542                           BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
 543   VP9_COMMON * const cm = &cpi->common;
 544   MACROBLOCK * const x = &cpi->mb;
 545   MACROBLOCKD * const xd = &x->e_mbd;
 546
 547   x->rd_search = 1;
 548
 549   if (bsize < BLOCK_SIZE_SB8X8)
 550     if (xd->ab_index != 0)
 551       return;
 552
 553   set_offsets(cpi, mi_row, mi_col, bsize);
 554   xd->mode_info_context->mbmi.sb_type = bsize;
 555   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
 556     vp9_activity_masking(cpi, x);
 557
 558   /* Find best coding mode & reconstruct the MB so it is available
 559    * as a predictor for MBs that follow in the SB */
 560   if (cm->frame_type == KEY_FRAME) {
 561     vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx);
 562   } else {
 563     vp9_rd_pick_inter_mode_sb(cpi, x, mi_row, mi_col, totalrate, totaldist,
 564                               bsize, ctx);
 565   }
 566 }
 567
 568 static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) {
 569   VP9_COMMON * const cm = &cpi->common;
 570   MACROBLOCK * const x = &cpi->mb;
 571   MACROBLOCKD * const xd = &x->e_mbd;
 572   MODE_INFO *mi = xd->mode_info_context;
 573   MB_MODE_INFO * const mbmi = &mi->mbmi;
 574
 575   if (cm->frame_type != KEY_FRAME) {
 576     int segment_id, seg_ref_active;
 577
 578     segment_id = mbmi->segment_id;
 579     seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
 580
 581     if (!seg_ref_active)
 582       cpi->intra_inter_count[vp9_get_pred_context(cm, xd, PRED_INTRA_INTER)][mbmi
 583           ->ref_frame[0] > INTRA_FRAME]++;
 584
 585     // If the segment reference feature is enabled we have only a single
 586     // reference frame allowed for the segment so exclude it from
 587     // the reference frame counts used to work out probabilities.
 588     if ((mbmi->ref_frame[0] > INTRA_FRAME) && !seg_ref_active) {
 589       if (cm->comp_pred_mode == HYBRID_PREDICTION)
 590         cpi->comp_inter_count[vp9_get_pred_context(cm, xd,
 591                                                    PRED_COMP_INTER_INTER)][mbmi
 592             ->ref_frame[1] > INTRA_FRAME]++;
 593
 594       if (mbmi->ref_frame[1] > INTRA_FRAME) {
 595         cpi->comp_ref_count[vp9_get_pred_context(cm, xd, PRED_COMP_REF_P)][mbmi
 596             ->ref_frame[0] == GOLDEN_FRAME]++;
 597       } else {
 598         cpi->single_ref_count[vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P1)][0][mbmi
 599             ->ref_frame[0] != LAST_FRAME]++;
 600         if (mbmi->ref_frame[0] != LAST_FRAME)
 601           cpi->single_ref_count[vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P2)][1][mbmi
 602               ->ref_frame[0] != GOLDEN_FRAME]++;
 603       }
 604     }
 605     // Count of last ref frame 0,0 usage
 606     if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame[0] == LAST_FRAME))
 607       cpi->inter_zz_count++;
 608   }
 609 }
 610
 611 // TODO(jingning): the variables used here are little complicated. need further
 612 // refactoring on organizing the the temporary buffers, when recursive
 613 // partition down to 4x4 block size is enabled.
 614 static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
 615                                             BLOCK_SIZE_TYPE bsize) {
 616   MACROBLOCKD * const xd = &x->e_mbd;
 617
 618   switch (bsize) {
 619     case BLOCK_SIZE_SB64X64:
 620       return &x->sb64_context;
 621     case BLOCK_SIZE_SB64X32:
 622       return &x->sb64x32_context[xd->sb_index];
 623     case BLOCK_SIZE_SB32X64:
 624       return &x->sb32x64_context[xd->sb_index];
 625     case BLOCK_SIZE_SB32X32:
 626       return &x->sb32_context[xd->sb_index];
 627     case BLOCK_SIZE_SB32X16:
 628       return &x->sb32x16_context[xd->sb_index][xd->mb_index];
 629     case BLOCK_SIZE_SB16X32:
 630       return &x->sb16x32_context[xd->sb_index][xd->mb_index];
 631     case BLOCK_SIZE_MB16X16:
 632       return &x->mb_context[xd->sb_index][xd->mb_index];
 633     case BLOCK_SIZE_SB16X8:
 634       return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
 635     case BLOCK_SIZE_SB8X16:
 636       return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
 637     case BLOCK_SIZE_SB8X8:
 638       return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
 639     case BLOCK_SIZE_SB8X4:
 640       return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
 641     case BLOCK_SIZE_SB4X8:
 642       return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
 643     case BLOCK_SIZE_AB4X4:
 644       return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
 645     default:
 646       assert(0);
 647       return NULL ;
 648   }
 649 }
 650
 651 static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x,
 652                                             BLOCK_SIZE_TYPE bsize) {
 653   MACROBLOCKD *xd = &x->e_mbd;
 654   switch (bsize) {
 655     case BLOCK_SIZE_SB64X64:
 656       return &x->sb64_partitioning;
 657     case BLOCK_SIZE_SB32X32:
 658       return &x->sb_partitioning[xd->sb_index];
 659     case BLOCK_SIZE_MB16X16:
 660       return &x->mb_partitioning[xd->sb_index][xd->mb_index];
 661     case BLOCK_SIZE_SB8X8:
 662       return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
 663     default:
 664       assert(0);
 665       return NULL ;
 666   }
 667 }
 668
 669 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
 670                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
 671                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
 672                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
 673                             BLOCK_SIZE_TYPE bsize) {
 674   VP9_COMMON * const cm = &cpi->common;
 675   MACROBLOCK * const x = &cpi->mb;
 676   MACROBLOCKD * const xd = &x->e_mbd;
 677   int p;
 678   int bwl = b_width_log2(bsize), bw = 1 << bwl;
 679   int bhl = b_height_log2(bsize), bh = 1 << bhl;
 680   int mwl = mi_width_log2(bsize), mw = 1 << mwl;
 681   int mhl = mi_height_log2(bsize), mh = 1 << mhl;
 682   for (p = 0; p < MAX_MB_PLANE; p++) {
 683     vpx_memcpy(
 684         cm->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
 685         a + bw * p, sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
 686     vpx_memcpy(
 687         cm->left_context[p]
 688             + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),l + bh * p,
 689             sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
 690           }
 691   vpx_memcpy(cm->above_seg_context + mi_col, sa,
 692              sizeof(PARTITION_CONTEXT) * mw);
 693   vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
 694              sizeof(PARTITION_CONTEXT) * mh);
 695 }
 696 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
 697                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
 698                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
 699                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
 700                          BLOCK_SIZE_TYPE bsize) {
 701   VP9_COMMON * const cm = &cpi->common;
 702   MACROBLOCK * const x = &cpi->mb;
 703   MACROBLOCKD * const xd = &x->e_mbd;
 704   int p;
 705   int bwl = b_width_log2(bsize), bw = 1 << bwl;
 706   int bhl = b_height_log2(bsize), bh = 1 << bhl;
 707   int mwl = mi_width_log2(bsize), mw = 1 << mwl;
 708   int mhl = mi_height_log2(bsize), mh = 1 << mhl;
 709
 710   // buffer the above/left context information of the block in search.
 711   for (p = 0; p < MAX_MB_PLANE; ++p) {
 712     vpx_memcpy(
 713         a + bw * p,
 714         cm->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
 715         sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
 716     vpx_memcpy(
 717         l + bh * p,
 718         cm->left_context[p]
 719             + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
 720           }
 721   vpx_memcpy(sa, cm->above_seg_context + mi_col,
 722              sizeof(PARTITION_CONTEXT) * mw);
 723   vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
 724   sizeof(PARTITION_CONTEXT) * mh)
 725              ;}
 726
 727 static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
 728                      int output_enabled, BLOCK_SIZE_TYPE bsize, int sub_index) {
 729   VP9_COMMON * const cm = &cpi->common;
 730   MACROBLOCK * const x = &cpi->mb;
 731   MACROBLOCKD * const xd = &x->e_mbd;
 732
 733   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
 734     return;
 735
 736   if (sub_index != -1)
 737     *(get_sb_index(xd, bsize)) = sub_index;
 738
 739   if (bsize < BLOCK_SIZE_SB8X8)
 740     if (xd->ab_index > 0)
 741       return;
 742   set_offsets(cpi, mi_row, mi_col, bsize);
 743   update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
 744   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
 745
 746   if (output_enabled) {
 747     update_stats(cpi, mi_row, mi_col);
 748
 749     (*tp)->token = EOSB_TOKEN;
 750     (*tp)++;
 751   }
 752 }
 753
 754 static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
 755                       int output_enabled, BLOCK_SIZE_TYPE bsize) {
 756   VP9_COMMON * const cm = &cpi->common;
 757   MACROBLOCK * const x = &cpi->mb;
 758   MACROBLOCKD * const xd = &x->e_mbd;
 759   BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8;
 760   const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
 761   int bwl, bhl;
 762   int UNINITIALIZED_IS_SAFE(pl);
 763
 764   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
 765     return;
 766
 767   c1 = BLOCK_SIZE_AB4X4;
 768   if (bsize >= BLOCK_SIZE_SB8X8) {
 769     set_partition_seg_context(cm, xd, mi_row, mi_col);
 770     pl = partition_plane_context(xd, bsize);
 771     c1 = *(get_sb_partitioning(x, bsize));
 772   }
 773
 774   bwl = b_width_log2(c1), bhl = b_height_log2(c1);
 775
 776   if (bsl == bwl && bsl == bhl) {
 777     if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
 778       cpi->partition_count[pl][PARTITION_NONE]++;
 779     encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
 780   } else if (bsl == bhl && bsl > bwl) {
 781     if (output_enabled)
 782       cpi->partition_count[pl][PARTITION_VERT]++;
 783     encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
 784     encode_b(cpi, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
 785   } else if (bsl == bwl && bsl > bhl) {
 786     if (output_enabled)
 787       cpi->partition_count[pl][PARTITION_HORZ]++;
 788     encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
 789     encode_b(cpi, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
 790   } else {
 791     BLOCK_SIZE_TYPE subsize;
 792     int i;
 793
 794     assert(bwl < bsl && bhl < bsl);
 795     subsize = get_subsize(bsize, PARTITION_SPLIT);
 796
 797     if (output_enabled)
 798       cpi->partition_count[pl][PARTITION_SPLIT]++;
 799
 800     for (i = 0; i < 4; i++) {
 801       const int x_idx = i & 1, y_idx = i >> 1;
 802
 803       *(get_sb_index(xd, subsize)) = i;
 804       encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
 805                 output_enabled, subsize);
 806     }
 807   }
 808
 809   if (bsize >= BLOCK_SIZE_SB8X8
 810       && (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) {
 811     set_partition_seg_context(cm, xd, mi_row, mi_col);
 812     update_partition_context(xd, c1, bsize);
 813   }
 814 }
 815
 816 static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m,
 817                              BLOCK_SIZE_TYPE bsize) {
 818   VP9_COMMON *const cm = &cpi->common;
 819   const int mis = cm->mode_info_stride;
 820   int block_row, block_col;
 821   for (block_row = 0; block_row < 8; ++block_row) {
 822     for (block_col = 0; block_col < 8; ++block_col) {
 823       m[block_row * mis + block_col].mbmi.sb_type = bsize;
 824     }
 825   }
 826 }
 827 static void copy_partitioning(VP9_COMP *cpi, MODE_INFO *m, MODE_INFO *p) {
 828   VP9_COMMON *const cm = &cpi->common;
 829   const int mis = cm->mode_info_stride;
 830   int block_row, block_col;
 831   for (block_row = 0; block_row < 8; ++block_row) {
 832     for (block_col = 0; block_col < 8; ++block_col) {
 833       m[block_row * mis + block_col].mbmi.sb_type =
 834           p[block_row * mis + block_col].mbmi.sb_type;
 835     }
 836   }
 837 }
 838
 839 static void set_block_size(VP9_COMMON * const cm, MODE_INFO *m,
 840                            BLOCK_SIZE_TYPE bsize, int mis, int mi_row,
 841                            int mi_col) {
 842   int row, col;
 843   int bwl = b_width_log2(bsize);
 844   int bhl = b_height_log2(bsize);
 845   int bsl = (bwl > bhl ? bwl : bhl);
 846
 847   int bs = (1 << bsl) / 2;  //
 848   MODE_INFO *m2 = m + mi_row * mis + mi_col;
 849   for (row = 0; row < bs; row++) {
 850     for (col = 0; col < bs; col++) {
 851       if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols)
 852         continue;
 853       m2[row * mis + col].mbmi.sb_type = bsize;
 854     }
 855   }
 856 }
 857
 858 typedef struct {
 859   int64_t sum_square_error;
 860   int64_t sum_error;
 861   int count;
 862   int variance;
 863 } var;
 864
 865 typedef struct {
 866   var none;
 867   var horz[2];
 868   var vert[2];
 869 } partition_variance;
 870
 871 #define VT(TYPE, BLOCKSIZE) \
 872   typedef struct { \
 873     partition_variance vt; \
 874     BLOCKSIZE split[4]; } TYPE;
 875
 876 VT(v8x8, var)
 877 VT(v16x16, v8x8)
 878 VT(v32x32, v16x16)
 879 VT(v64x64, v32x32)
 880
 881 typedef struct {
 882   partition_variance *vt;
 883   var *split[4];
 884 } vt_node;
 885
 886 typedef enum {
 887   V16X16,
 888   V32X32,
 889   V64X64,
 890 } TREE_LEVEL;
 891
 892 static void tree_to_node(void *data, BLOCK_SIZE_TYPE block_size, vt_node *node) {
 893   int i;
 894   switch (block_size) {
 895     case BLOCK_SIZE_SB64X64: {
 896       v64x64 *vt = (v64x64 *) data;
 897       node->vt = &vt->vt;
 898       for (i = 0; i < 4; i++)
 899         node->split[i] = &vt->split[i].vt.none;
 900       break;
 901     }
 902     case BLOCK_SIZE_SB32X32: {
 903       v32x32 *vt = (v32x32 *) data;
 904       node->vt = &vt->vt;
 905       for (i = 0; i < 4; i++)
 906         node->split[i] = &vt->split[i].vt.none;
 907       break;
 908     }
 909     case BLOCK_SIZE_MB16X16: {
 910       v16x16 *vt = (v16x16 *) data;
 911       node->vt = &vt->vt;
 912       for (i = 0; i < 4; i++)
 913         node->split[i] = &vt->split[i].vt.none;
 914       break;
 915     }
 916     case BLOCK_SIZE_SB8X8: {
 917       v8x8 *vt = (v8x8 *) data;
 918       node->vt = &vt->vt;
 919       for (i = 0; i < 4; i++)
 920         node->split[i] = &vt->split[i];
 921       break;
 922     }
 923     default:
 924       node->vt = 0;
 925       for (i = 0; i < 4; i++)
 926         node->split[i] = 0;
 927       assert(-1);
 928   }
 929 }
 930
 931 // Set variance values given sum square error, sum error, count.
 932 static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
 933   v->sum_square_error = s2;
 934   v->sum_error = s;
 935   v->count = c;
 936   if (c > 0)
 937     v->variance = 256
 938         * (v->sum_square_error - v->sum_error * v->sum_error / v->count)
 939         / v->count;
 940   else
 941     v->variance = 0;
 942 }
 943
 944 // Combine 2 variance structures by summing the sum_error, sum_square_error,
 945 // and counts and then calculating the new variance.
 946 void sum_2_variances(var *r, var *a, var*b) {
 947   fill_variance(r, a->sum_square_error + b->sum_square_error,
 948                 a->sum_error + b->sum_error, a->count + b->count);
 949 }
 950
 951 static void fill_variance_tree(void *data, BLOCK_SIZE_TYPE block_size) {
 952   vt_node node;
 953   tree_to_node(data, block_size, &node);
 954   sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
 955   sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
 956   sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
 957   sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
 958   sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
 959 }
 960
 961 #if PERFORM_RANDOM_PARTITIONING
 962 static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
 963     BLOCK_SIZE_TYPE block_size, int mi_row,
 964     int mi_col, int mi_size) {
 965   VP9_COMMON * const cm = &cpi->common;
 966   vt_node vt;
 967   const int mis = cm->mode_info_stride;
 968   int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
 969
 970   tree_to_node(data, block_size, &vt);
 971
 972   // split none is available only if we have more than half a block size
 973   // in width and height inside the visible image
 974   if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
 975       (rand() & 3) < 1) {
 976     set_block_size(cm, m, block_size, mis, mi_row, mi_col);
 977     return 1;
 978   }
 979
 980   // vertical split is available on all but the bottom border
 981   if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
 982       && (rand() & 3) < 1) {
 983     set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
 984         mi_col);
 985     return 1;
 986   }
 987
 988   // horizontal split is available on all but the right border
 989   if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
 990       && (rand() & 3) < 1) {
 991     set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
 992         mi_col);
 993     return 1;
 994   }
 995
 996   return 0;
 997 }
 998
 999 #else
1000
1001 static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
1002                                BLOCK_SIZE_TYPE block_size, int mi_row,
1003                                int mi_col, int mi_size) {
1004   VP9_COMMON * const cm = &cpi->common;
1005   vt_node vt;
1006   const int mis = cm->mode_info_stride;
1007   int64_t threshold = 50 * cpi->common.base_qindex;
1008
1009   tree_to_node(data, block_size, &vt);
1010
1011   // split none is available only if we have more than half a block size
1012   // in width and height inside the visible image
1013   if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
1014       && vt.vt->none.variance < threshold) {
1015     set_block_size(cm, m, block_size, mis, mi_row, mi_col);
1016     return 1;
1017   }
1018
1019   // vertical split is available on all but the bottom border
1020   if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
1021       && vt.vt->vert[1].variance < threshold) {
1022     set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
1023                    mi_col);
1024     return 1;
1025   }
1026
1027   // horizontal split is available on all but the right border
1028   if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
1029       && vt.vt->horz[1].variance < threshold) {
1030     set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
1031                    mi_col);
1032     return 1;
1033   }
1034
1035   return 0;
1036 }
1037 #endif
1038
1039 static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
1040                                 int mi_col) {
1041   VP9_COMMON * const cm = &cpi->common;
1042   MACROBLOCK *x = &cpi->mb;
1043   MACROBLOCKD *xd = &cpi->mb.e_mbd;
1044   const int mis = cm->mode_info_stride;
1045   // TODO(JBB): More experimentation or testing of this threshold;
1046   int64_t threshold = 4;
1047   int i, j, k;
1048   v64x64 vt;
1049   unsigned char * s;
1050   int sp;
1051   const unsigned char * d;
1052   int dp;
1053   int pixels_wide = 64, pixels_high = 64;
1054
1055   vpx_memset(&vt, 0, sizeof(vt));
1056
1057   set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1058
1059   if (xd->mb_to_right_edge < 0)
1060     pixels_wide += (xd->mb_to_right_edge >> 3);
1061
1062   if (xd->mb_to_bottom_edge < 0)
1063     pixels_high += (xd->mb_to_bottom_edge >> 3);
1064
1065   s = x->plane[0].src.buf;
1066   sp = x->plane[0].src.stride;
1067
1068   // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
1069   // but this needs more experimentation.
1070   threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
1071
1072   d = vp9_64x64_zeros;
1073   dp = 64;
1074   if (cm->frame_type != KEY_FRAME) {
1075     int_mv nearest_mv, near_mv;
1076     YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[0];
1077     YV12_BUFFER_CONFIG *second_ref_fb = NULL;
1078
1079     setup_pre_planes(xd, ref_fb, second_ref_fb, mi_row, mi_col,
1080                      xd->scale_factor, xd->scale_factor_uv);
1081     xd->mode_info_context->mbmi.ref_frame[0] = LAST_FRAME;
1082     xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64;
1083     vp9_find_best_ref_mvs(xd, m->mbmi.ref_mvs[m->mbmi.ref_frame[0]],
1084                           &nearest_mv, &near_mv);
1085
1086     xd->mode_info_context->mbmi.mv[0] = nearest_mv;
1087     vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1088     d = xd->plane[0].dst.buf;
1089     dp = xd->plane[0].dst.stride;
1090
1091   }
1092
1093   // Fill in the entire tree of 8x8 variances for splits.
1094   for (i = 0; i < 4; i++) {
1095     const int x32_idx = ((i & 1) << 5);
1096     const int y32_idx = ((i >> 1) << 5);
1097     for (j = 0; j < 4; j++) {
1098       const int x16_idx = x32_idx + ((j & 1) << 4);
1099       const int y16_idx = y32_idx + ((j >> 1) << 4);
1100       v16x16 *vst = &vt.split[i].split[j];
1101       for (k = 0; k < 4; k++) {
1102         int x_idx = x16_idx + ((k & 1) << 3);
1103         int y_idx = y16_idx + ((k >> 1) << 3);
1104         unsigned int sse = 0;
1105         int sum = 0;
1106         if (x_idx < pixels_wide && y_idx < pixels_high)
1107           vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
1108                               d + y_idx * dp + x_idx, dp, &sse, &sum);
1109         fill_variance(&vst->split[k].vt.none, sse, sum, 64);
1110       }
1111     }
1112   }
1113   // Fill the rest of the variance tree by summing the split partition
1114   // values.
1115   for (i = 0; i < 4; i++) {
1116     for (j = 0; j < 4; j++) {
1117       fill_variance_tree(&vt.split[i].split[j], BLOCK_SIZE_MB16X16);
1118     }
1119     fill_variance_tree(&vt.split[i], BLOCK_SIZE_SB32X32);
1120   }
1121   fill_variance_tree(&vt, BLOCK_SIZE_SB64X64);
1122   // Now go through the entire structure,  splitting every block size until
1123   // we get to one that's got a variance lower than our threshold,  or we
1124   // hit 8x8.
1125   if (!set_vt_partitioning(cpi, &vt, m, BLOCK_SIZE_SB64X64, mi_row, mi_col,
1126                            4)) {
1127     for (i = 0; i < 4; ++i) {
1128       const int x32_idx = ((i & 1) << 2);
1129       const int y32_idx = ((i >> 1) << 2);
1130       if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_SIZE_SB32X32,
1131                                (mi_row + y32_idx), (mi_col + x32_idx), 2)) {
1132         for (j = 0; j < 4; ++j) {
1133           const int x16_idx = ((j & 1) << 1);
1134           const int y16_idx = ((j >> 1) << 1);
1135           if (!set_vt_partitioning(cpi, &vt.split[i].split[j], m,
1136                                    BLOCK_SIZE_MB16X16,
1137                                    (mi_row + y32_idx + y16_idx),
1138                                    (mi_col + x32_idx + x16_idx), 1)) {
1139             for (k = 0; k < 4; ++k) {
1140               const int x8_idx = (k & 1);
1141               const int y8_idx = (k >> 1);
1142               set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis,
1143                              (mi_row + y32_idx + y16_idx + y8_idx),
1144                              (mi_col + x32_idx + x16_idx + x8_idx));
1145             }
1146           }
1147         }
1148       }
1149     }
1150   }
1151 }
1152 static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
1153                              int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
1154                              int *rate, int64_t *dist) {
1155   VP9_COMMON * const cm = &cpi->common;
1156   MACROBLOCK * const x = &cpi->mb;
1157   MACROBLOCKD *xd = &cpi->mb.e_mbd;
1158   const int mis = cm->mode_info_stride;
1159   int bwl = b_width_log2(m->mbmi.sb_type);
1160   int bhl = b_height_log2(m->mbmi.sb_type);
1161   int bsl = b_width_log2(bsize);
1162   int bs = (1 << bsl);
1163   int bh = (1 << bhl);
1164   int ms = bs / 2;
1165   int mh = bh / 2;
1166   int bss = (1 << bsl) / 4;
1167   int i, pl;
1168   PARTITION_TYPE partition;
1169   BLOCK_SIZE_TYPE subsize;
1170   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1171   PARTITION_CONTEXT sl[8], sa[8];
1172   int last_part_rate = INT_MAX;
1173   int64_t last_part_dist = INT_MAX;
1174   int split_rate = INT_MAX;
1175   int64_t split_dist = INT_MAX;
1176   int none_rate = INT_MAX;
1177   int64_t none_dist = INT_MAX;
1178   int chosen_rate = INT_MAX;
1179   int64_t chosen_dist = INT_MAX;
1180   BLOCK_SIZE_TYPE sub_subsize = BLOCK_SIZE_AB4X4;
1181   int splits_below = 0;
1182   BLOCK_SIZE_TYPE bs_type = m->mbmi.sb_type;
1183
1184   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1185     return;
1186
1187   // parse the partition type
1188   if ((bwl == bsl) && (bhl == bsl))
1189     partition = PARTITION_NONE;
1190   else if ((bwl == bsl) && (bhl < bsl))
1191     partition = PARTITION_HORZ;
1192   else if ((bwl < bsl) && (bhl == bsl))
1193     partition = PARTITION_VERT;
1194   else if ((bwl < bsl) && (bhl < bsl))
1195     partition = PARTITION_SPLIT;
1196   else
1197     assert(0);
1198
1199   subsize = get_subsize(bsize, partition);
1200
1201   if (bsize < BLOCK_SIZE_SB8X8) {
1202     if (xd->ab_index != 0) {
1203       *rate = 0;
1204       *dist = 0;
1205       return;
1206     }
1207   } else {
1208     *(get_sb_partitioning(x, bsize)) = subsize;
1209   }
1210   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1211
1212   if (cpi->sf.adjust_partitioning_from_last_frame) {
1213     // Check if any of the sub blocks are further split.
1214     if (partition == PARTITION_SPLIT && subsize > BLOCK_SIZE_SB8X8) {
1215       sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
1216       splits_below = 1;
1217       for (i = 0; i < 4; i++) {
1218         int jj = i >> 1, ii = i & 0x01;
1219         if (m[jj * bss * mis + ii * bss].mbmi.sb_type >= sub_subsize)  {
1220           splits_below = 0;
1221         }
1222       }
1223     }
1224
1225     // If partition is not none try none unless each of the 4 splits are split
1226     // even further..
1227     if (partition != PARTITION_NONE && !splits_below &&
1228         mi_row + (ms >> 1) < cm->mi_rows &&
1229         mi_col + (ms >> 1) < cm->mi_cols) {
1230       *(get_sb_partitioning(x, bsize)) = bsize;
1231       pick_sb_modes(cpi, mi_row, mi_col, tp, &none_rate, &none_dist, bsize,
1232                     get_block_context(x, bsize));
1233
1234       set_partition_seg_context(cm, xd, mi_row, mi_col);
1235       pl = partition_plane_context(xd, bsize);
1236       none_rate += x->partition_cost[pl][PARTITION_NONE];
1237
1238       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1239       m->mbmi.sb_type = bs_type;
1240       *(get_sb_partitioning(x, bsize)) = subsize;
1241     }
1242   }
1243
1244   switch (partition) {
1245     case PARTITION_NONE:
1246       pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1247                     bsize, get_block_context(x, bsize));
1248       set_partition_seg_context(cm, xd, mi_row, mi_col);
1249       pl = partition_plane_context(xd, bsize);
1250       last_part_rate += x->partition_cost[pl][PARTITION_NONE];
1251       break;
1252     case PARTITION_HORZ:
1253       *(get_sb_index(xd, subsize)) = 0;
1254       pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1255                     subsize, get_block_context(x, subsize));
1256       if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
1257         int rt = 0;
1258         int64_t dt = 0;
1259         update_state(cpi, get_block_context(x, subsize), subsize, 0);
1260         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1261         *(get_sb_index(xd, subsize)) = 1;
1262         pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &rt, &dt, subsize,
1263                       get_block_context(x, subsize));
1264         last_part_rate += rt;
1265         last_part_dist += dt;
1266       }
1267       set_partition_seg_context(cm, xd, mi_row, mi_col);
1268       pl = partition_plane_context(xd, bsize);
1269       last_part_rate += x->partition_cost[pl][PARTITION_HORZ];
1270       break;
1271     case PARTITION_VERT:
1272       *(get_sb_index(xd, subsize)) = 0;
1273       pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1274                     subsize, get_block_context(x, subsize));
1275       if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
1276         int rt = 0;
1277         int64_t dt = 0;
1278         update_state(cpi, get_block_context(x, subsize), subsize, 0);
1279         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1280         *(get_sb_index(xd, subsize)) = 1;
1281         pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &rt, &dt, subsize,
1282                       get_block_context(x, subsize));
1283         last_part_rate += rt;
1284         last_part_dist += dt;
1285       }
1286       set_partition_seg_context(cm, xd, mi_row, mi_col);
1287       pl = partition_plane_context(xd, bsize);
1288       last_part_rate += x->partition_cost[pl][PARTITION_VERT];
1289       break;
1290     case PARTITION_SPLIT:
1291       // Split partition.
1292       last_part_rate = 0;
1293       last_part_dist = 0;
1294       for (i = 0; i < 4; i++) {
1295         int x_idx = (i & 1) * (ms >> 1);
1296         int y_idx = (i >> 1) * (ms >> 1);
1297         int jj = i >> 1, ii = i & 0x01;
1298         int rt;
1299         int64_t dt;
1300
1301         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1302           continue;
1303
1304         *(get_sb_index(xd, subsize)) = i;
1305
1306         rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx,
1307                          mi_col + x_idx, subsize, &rt, &dt);
1308         last_part_rate += rt;
1309         last_part_dist += dt;
1310       }
1311       set_partition_seg_context(cm, xd, mi_row, mi_col);
1312       pl = partition_plane_context(xd, bsize);
1313       last_part_rate += x->partition_cost[pl][PARTITION_SPLIT];
1314       break;
1315     default:
1316       assert(0);
1317   }
1318   if (cpi->sf.adjust_partitioning_from_last_frame
1319       && partition != PARTITION_SPLIT && bsize > BLOCK_SIZE_SB8X8
1320       && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
1321       && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
1322     BLOCK_SIZE_TYPE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
1323     split_rate = 0;
1324     split_dist = 0;
1325     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1326
1327     // Split partition.
1328     for (i = 0; i < 4; i++) {
1329       int x_idx = (i & 1) * (bs >> 2);
1330       int y_idx = (i >> 1) * (bs >> 2);
1331       int rt = 0;
1332       int64_t dt = 0;
1333       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1334       PARTITION_CONTEXT sl[8], sa[8];
1335
1336       if ((mi_row + y_idx >= cm->mi_rows)
1337           || (mi_col + x_idx >= cm->mi_cols))
1338         continue;
1339
1340       *(get_sb_index(xd, split_subsize)) = i;
1341       *(get_sb_partitioning(x, bsize)) = split_subsize;
1342       *(get_sb_partitioning(x, split_subsize)) = split_subsize;
1343
1344       save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1345
1346       pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, tp, &rt, &dt,
1347                     split_subsize, get_block_context(x, split_subsize));
1348
1349       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1350
1351       if (rt < INT_MAX && dt < INT_MAX)
1352         encode_sb(cpi, tp,  mi_row + y_idx, mi_col + x_idx, 0,
1353                   split_subsize);
1354
1355       split_rate += rt;
1356       split_dist += dt;
1357       set_partition_seg_context(cm, xd, mi_row + y_idx, mi_col + x_idx);
1358       pl = partition_plane_context(xd, bsize);
1359       split_rate += x->partition_cost[pl][PARTITION_NONE];
1360     }
1361     set_partition_seg_context(cm, xd, mi_row, mi_col);
1362     pl = partition_plane_context(xd, bsize);
1363     split_rate += x->partition_cost[pl][PARTITION_SPLIT];
1364
1365     chosen_rate = split_rate;
1366     chosen_dist = split_dist;
1367   }
1368
1369   // If last_part is better set the partitioning to that...
1370   if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
1371       < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
1372     m->mbmi.sb_type = bsize;
1373     if (bsize >= BLOCK_SIZE_SB8X8)
1374       *(get_sb_partitioning(x, bsize)) = subsize;
1375     chosen_rate = last_part_rate;
1376     chosen_dist = last_part_dist;
1377   }
1378   // If none was better set the partitioning to that...
1379   if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
1380       > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
1381     if (bsize >= BLOCK_SIZE_SB8X8)
1382       *(get_sb_partitioning(x, bsize)) = bsize;
1383     chosen_rate = none_rate;
1384     chosen_dist = none_dist;
1385   }
1386
1387   restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1388
1389   // We must have chosen a partitioning and encoding or we'll fail later on.
1390   // No other opportunities for success.
1391   assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX);
1392
1393   encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
1394   *rate = chosen_rate;
1395   *dist = chosen_dist;
1396 }
1397
1398
1399 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
1400 // unlikely to be selected depending on previously rate-distortion optimization
1401 // results, for encoding speed-up.
1402 static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
1403                               int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
1404                               int64_t *dist) {
1405   VP9_COMMON * const cm = &cpi->common;
1406   MACROBLOCK * const x = &cpi->mb;
1407   MACROBLOCKD * const xd = &x->e_mbd;
1408   int bsl = b_width_log2(bsize), bs = 1 << bsl;
1409   int ms = bs / 2;
1410   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1411   PARTITION_CONTEXT sl[8], sa[8];
1412   TOKENEXTRA *tp_orig = *tp;
1413   int i, pl;
1414   BLOCK_SIZE_TYPE subsize;
1415   int srate = INT_MAX;
1416   int64_t sdist = INT_MAX;
1417
1418   if (bsize < BLOCK_SIZE_SB8X8)
1419     if (xd->ab_index != 0) {
1420       *rate = 0;
1421       *dist = 0;
1422       return;
1423     }
1424   assert(mi_height_log2(bsize) == mi_width_log2(bsize));
1425
1426   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1427
1428   // PARTITION_SPLIT
1429   if (!cpi->sf.use_partitions_greater_than
1430       || (cpi->sf.use_partitions_greater_than
1431           && bsize > cpi->sf.greater_than_block_size)) {
1432     if (bsize >= BLOCK_SIZE_SB8X8) {
1433       int r4 = 0;
1434       int64_t d4 = 0;
1435       subsize = get_subsize(bsize, PARTITION_SPLIT);
1436       *(get_sb_partitioning(x, bsize)) = subsize;
1437
1438       for (i = 0; i < 4; ++i) {
1439         int x_idx = (i & 1) * (ms >> 1);
1440         int y_idx = (i >> 1) * (ms >> 1);
1441         int r = 0;
1442         int64_t d = 0;
1443
1444         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1445           continue;
1446
1447         *(get_sb_index(xd, subsize)) = i;
1448         rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, &r,
1449                           &d);
1450
1451         r4 += r;
1452         d4 += d;
1453       }
1454       set_partition_seg_context(cm, xd, mi_row, mi_col);
1455       pl = partition_plane_context(xd, bsize);
1456       if (r4 < INT_MAX)
1457         r4 += x->partition_cost[pl][PARTITION_SPLIT];
1458       assert(r4 >= 0);
1459       assert(d4 >= 0);
1460       srate = r4;
1461       sdist = d4;
1462       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1463     }
1464   }
1465   if (!cpi->sf.use_partitions_less_than
1466       || (cpi->sf.use_partitions_less_than
1467           && bsize <= cpi->sf.less_than_block_size)) {
1468     int larger_is_better = 0;
1469     // PARTITION_NONE
1470     if ((mi_row + (ms >> 1) < cm->mi_rows) &&
1471         (mi_col + (ms >> 1) < cm->mi_cols)) {
1472       int r;
1473       int64_t d;
1474       pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
1475                     get_block_context(x, bsize));
1476       if (bsize >= BLOCK_SIZE_SB8X8) {
1477         set_partition_seg_context(cm, xd, mi_row, mi_col);
1478         pl = partition_plane_context(xd, bsize);
1479         r += x->partition_cost[pl][PARTITION_NONE];
1480       }
1481
1482       if (RDCOST(x->rdmult, x->rddiv, r, d)
1483           < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1484         srate = r;
1485         sdist = d;
1486         larger_is_better = 1;
1487         if (bsize >= BLOCK_SIZE_SB8X8)
1488           *(get_sb_partitioning(x, bsize)) = bsize;
1489       }
1490     }
1491     if (!cpi->sf.use_square_partition_only &&
1492         (!cpi->sf.less_rectangular_check ||!larger_is_better)) {
1493       // PARTITION_HORZ
1494       if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
1495         int r2, r = 0;
1496         int64_t d2, d = 0;
1497         subsize = get_subsize(bsize, PARTITION_HORZ);
1498         *(get_sb_index(xd, subsize)) = 0;
1499         pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
1500                       get_block_context(x, subsize));
1501
1502         if (mi_row + (ms >> 1) < cm->mi_rows) {
1503           update_state(cpi, get_block_context(x, subsize), subsize, 0);
1504           encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1505
1506           *(get_sb_index(xd, subsize)) = 1;
1507           pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
1508                         get_block_context(x, subsize));
1509           r2 += r;
1510           d2 += d;
1511         }
1512         set_partition_seg_context(cm, xd, mi_row, mi_col);
1513         pl = partition_plane_context(xd, bsize);
1514         if (r2 < INT_MAX)
1515           r2 += x->partition_cost[pl][PARTITION_HORZ];
1516         if (RDCOST(x->rdmult, x->rddiv, r2, d2)
1517             < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1518           srate = r2;
1519           sdist = d2;
1520           *(get_sb_partitioning(x, bsize)) = subsize;
1521         }
1522         restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1523       }
1524
1525       // PARTITION_VERT
1526       if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
1527         int r2;
1528         int64_t d2;
1529         subsize = get_subsize(bsize, PARTITION_VERT);
1530         *(get_sb_index(xd, subsize)) = 0;
1531         pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
1532                       get_block_context(x, subsize));
1533         if (mi_col + (ms >> 1) < cm->mi_cols) {
1534           int r = 0;
1535           int64_t d = 0;
1536           update_state(cpi, get_block_context(x, subsize), subsize, 0);
1537           encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1538
1539           *(get_sb_index(xd, subsize)) = 1;
1540           pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
1541                         get_block_context(x, subsize));
1542           r2 += r;
1543           d2 += d;
1544         }
1545         set_partition_seg_context(cm, xd, mi_row, mi_col);
1546         pl = partition_plane_context(xd, bsize);
1547         if (r2 < INT_MAX)
1548           r2 += x->partition_cost[pl][PARTITION_VERT];
1549         if (RDCOST(x->rdmult, x->rddiv, r2, d2)
1550             < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1551           srate = r2;
1552           sdist = d2;
1553           *(get_sb_partitioning(x, bsize)) = subsize;
1554         }
1555         restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1556       }
1557     }
1558   }
1559   *rate = srate;
1560   *dist = sdist;
1561
1562   restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1563
1564   if (srate < INT_MAX && sdist < INT_MAX)
1565     encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
1566
1567   if (bsize == BLOCK_SIZE_SB64X64) {
1568     assert(tp_orig < *tp);
1569     assert(srate < INT_MAX);
1570     assert(sdist < INT_MAX);
1571   } else {
1572     assert(tp_orig == *tp);
1573   }
1574 }
1575
1576 static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
1577                           int *totalrate) {
1578   VP9_COMMON * const cm = &cpi->common;
1579   int mi_col;
1580
1581   // Initialize the left context for the new SB row
1582   vpx_memset(&cm->left_context, 0, sizeof(cm->left_context));
1583   vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
1584
1585   // Code each SB in the row
1586   for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
1587       mi_col += 64 / MI_SIZE) {
1588     int dummy_rate;
1589     int64_t dummy_dist;
1590     if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
1591         cpi->sf.use_one_partition_size_always ) {
1592       const int idx_str = cm->mode_info_stride * mi_row + mi_col;
1593       MODE_INFO *m = cm->mi + idx_str;
1594       MODE_INFO *p = cm->prev_mi + idx_str;
1595
1596       if (cpi->sf.use_one_partition_size_always) {
1597         set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1598         set_partitioning(cpi, m, cpi->sf.always_this_block_size);
1599         rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1600                          &dummy_rate, &dummy_dist);
1601       } else if (cpi->sf.partition_by_variance) {
1602         choose_partitioning(cpi, cm->mi, mi_row, mi_col);
1603         rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1604                          &dummy_rate, &dummy_dist);
1605       } else {
1606         if ((cpi->common.current_video_frame
1607             % cpi->sf.last_partitioning_redo_frequency) == 0
1608             || cm->prev_mi == 0
1609             || cpi->common.show_frame == 0
1610             || cpi->common.frame_type == KEY_FRAME
1611             || cpi->is_src_frame_alt_ref) {
1612           rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1613                             &dummy_rate, &dummy_dist);
1614         } else {
1615           copy_partitioning(cpi, m, p);
1616           rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1617                            &dummy_rate, &dummy_dist);
1618         }
1619       }
1620     } else {
1621       rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1622                         &dummy_rate, &dummy_dist);
1623     }
1624   }
1625 }
1626
1627 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
1628   MACROBLOCK * const x = &cpi->mb;
1629   VP9_COMMON * const cm = &cpi->common;
1630   MACROBLOCKD * const xd = &x->e_mbd;
1631
1632   x->act_zbin_adj = 0;
1633   cpi->seg0_idx = 0;
1634
1635   xd->mode_info_stride = cm->mode_info_stride;
1636   xd->frame_type = cm->frame_type;
1637
1638   xd->frames_since_golden = cm->frames_since_golden;
1639   xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
1640
1641   // reset intra mode contexts
1642   if (cm->frame_type == KEY_FRAME)
1643     vp9_init_mbmode_probs(cm);
1644
1645   // Copy data over into macro block data structures.
1646   vp9_setup_src_planes(x, cpi->Source, 0, 0);
1647
1648   // TODO(jkoleszar): are these initializations required?
1649   setup_pre_planes(xd, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]], NULL,
1650                    0, 0, NULL, NULL );
1651   setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
1652
1653   setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
1654
1655   xd->mode_info_context->mbmi.mode = DC_PRED;
1656   xd->mode_info_context->mbmi.uv_mode = DC_PRED;
1657
1658   vp9_zero(cpi->y_mode_count)
1659   vp9_zero(cpi->y_uv_mode_count)
1660   vp9_zero(cm->fc.inter_mode_counts)
1661   vp9_zero(cpi->partition_count);
1662   vp9_zero(cpi->intra_inter_count);
1663   vp9_zero(cpi->comp_inter_count);
1664   vp9_zero(cpi->single_ref_count);
1665   vp9_zero(cpi->comp_ref_count);
1666   vp9_zero(cm->fc.tx_count_32x32p);
1667   vp9_zero(cm->fc.tx_count_16x16p);
1668   vp9_zero(cm->fc.tx_count_8x8p);
1669   vp9_zero(cm->fc.mbskip_count);
1670
1671   // Note: this memset assumes above_context[0], [1] and [2]
1672   // are allocated as part of the same buffer.
1673   vpx_memset(
1674       cm->above_context[0], 0,
1675       sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
1676   vpx_memset(cm->above_seg_context, 0,
1677              sizeof(PARTITION_CONTEXT) * mi_cols_aligned_to_sb(cm));
1678 }
1679
1680 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
1681   if (lossless) {
1682     // printf("Switching to lossless\n");
1683     cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
1684     cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
1685     cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add;
1686     cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add;
1687     cpi->mb.optimize = 0;
1688     cpi->common.filter_level = 0;
1689     cpi->zbin_mode_boost_enabled = 0;
1690     cpi->common.txfm_mode = ONLY_4X4;
1691   } else {
1692     // printf("Not lossless\n");
1693     cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
1694     cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
1695     cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add;
1696     cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add;
1697   }
1698 }
1699
1700 static void switch_txfm_mode(VP9_COMP *cpi) {
1701   if (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
1702       cpi->common.txfm_mode >= ALLOW_32X32)
1703     cpi->common.txfm_mode = ALLOW_32X32;
1704 }
1705
1706 static void encode_frame_internal(VP9_COMP *cpi) {
1707   int mi_row;
1708   MACROBLOCK * const x = &cpi->mb;
1709   VP9_COMMON * const cm = &cpi->common;
1710   MACROBLOCKD * const xd = &x->e_mbd;
1711   int totalrate;
1712
1713 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
1714 //           cpi->common.current_video_frame, cpi->common.show_frame,
1715 //           cm->frame_type);
1716
1717 // debug output
1718 #if DBG_PRNT_SEGMAP
1719   {
1720     FILE *statsfile;
1721     statsfile = fopen("segmap2.stt", "a");
1722     fprintf(statsfile, "\n");
1723     fclose(statsfile);
1724   }
1725 #endif
1726
1727   totalrate = 0;
1728
1729   // Reset frame count of inter 0,0 motion vector usage.
1730   cpi->inter_zz_count = 0;
1731
1732   vp9_zero(cm->fc.switchable_interp_count);
1733   vp9_zero(cpi->best_switchable_interp_count);
1734   vp9_zero(cpi->txfm_stepdown_count);
1735
1736   xd->mode_info_context = cm->mi;
1737   xd->prev_mode_info_context = cm->prev_mi;
1738
1739   vp9_zero(cpi->NMVcount);
1740   vp9_zero(cpi->coef_counts);
1741   vp9_zero(cm->fc.eob_branch_counts);
1742
1743   cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
1744       && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
1745   switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
1746
1747   vp9_frame_init_quantizer(cpi);
1748
1749   vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q);
1750   vp9_initialize_me_consts(cpi, cm->base_qindex);
1751   switch_txfm_mode(cpi);
1752
1753   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
1754     // Initialize encode frame context.
1755     init_encode_frame_mb_context(cpi);
1756
1757     // Build a frame level activity map
1758     build_activity_map(cpi);
1759   }
1760
1761   // re-initencode frame context.
1762   init_encode_frame_mb_context(cpi);
1763
1764   vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
1765   vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
1766   vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
1767
1768   set_prev_mi(cm);
1769
1770   {
1771     struct vpx_usec_timer emr_timer;
1772     vpx_usec_timer_start(&emr_timer);
1773
1774     {
1775       // Take tiles into account and give start/end MB
1776       int tile_col, tile_row;
1777       TOKENEXTRA *tp = cpi->tok;
1778
1779       for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
1780         vp9_get_tile_row_offsets(cm, tile_row);
1781
1782         for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
1783           TOKENEXTRA *tp_old = tp;
1784
1785           // For each row of SBs in the frame
1786           vp9_get_tile_col_offsets(cm, tile_col);
1787           for (mi_row = cm->cur_tile_mi_row_start;
1788               mi_row < cm->cur_tile_mi_row_end; mi_row += 8)
1789             encode_sb_row(cpi, mi_row, &tp, &totalrate);
1790
1791           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
1792           assert(tp - cpi->tok <=
1793                  get_token_alloc(cm->mb_rows, cm->mb_cols));
1794         }
1795       }
1796     }
1797
1798     vpx_usec_timer_mark(&emr_timer);
1799     cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
1800   }
1801
1802   // 256 rate units to the bit,
1803   // projected_frame_size in units of BYTES
1804   cpi->projected_frame_size = totalrate >> 8;
1805
1806 #if 0
1807   // Keep record of the total distortion this time around for future use
1808   cpi->last_frame_distortion = cpi->frame_distortion;
1809 #endif
1810
1811 }
1812
1813 static int check_dual_ref_flags(VP9_COMP *cpi) {
1814   MACROBLOCKD *xd = &cpi->mb.e_mbd;
1815   int ref_flags = cpi->ref_frame_flags;
1816
1817   if (vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) {
1818     return 0;
1819   } else {
1820     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
1821         + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
1822   }
1823 }
1824
1825 static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) {
1826   int x, y;
1827
1828   for (y = 0; y < ymbs; y++) {
1829     for (x = 0; x < xmbs; x++) {
1830       if (!mi[y * mis + x].mbmi.mb_skip_coeff)
1831         return 0;
1832     }
1833   }
1834
1835   return 1;
1836 }
1837
1838 static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs,
1839                           TX_SIZE txfm_size) {
1840   int x, y;
1841
1842   for (y = 0; y < ymbs; y++) {
1843     for (x = 0; x < xmbs; x++)
1844       mi[y * mis + x].mbmi.txfm_size = txfm_size;
1845   }
1846 }
1847
1848 static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO *mi, int mis,
1849                                    TX_SIZE txfm_max, int bw, int bh, int mi_row,
1850                                    int mi_col, BLOCK_SIZE_TYPE bsize) {
1851   VP9_COMMON * const cm = &cpi->common;
1852   MB_MODE_INFO * const mbmi = &mi->mbmi;
1853
1854   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1855     return;
1856
1857   if (mbmi->txfm_size > txfm_max) {
1858     MACROBLOCK * const x = &cpi->mb;
1859     MACROBLOCKD * const xd = &x->e_mbd;
1860     const int segment_id = mbmi->segment_id;
1861     const int ymbs = MIN(bh, cm->mi_rows - mi_row);
1862     const int xmbs = MIN(bw, cm->mi_cols - mi_col);
1863
1864     xd->mode_info_context = mi;
1865     assert(
1866         vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) || get_skip_flag(mi, mis, ymbs, xmbs));
1867     set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
1868   }
1869 }
1870
1871 static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
1872                                     TX_SIZE txfm_max, int mi_row, int mi_col,
1873                                     BLOCK_SIZE_TYPE bsize) {
1874   VP9_COMMON * const cm = &cpi->common;
1875   const int mis = cm->mode_info_stride;
1876   int bwl, bhl;
1877   const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1);
1878
1879   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1880     return;
1881
1882   bwl = mi_width_log2(mi->mbmi.sb_type);
1883   bhl = mi_height_log2(mi->mbmi.sb_type);
1884
1885   if (bwl == bsl && bhl == bsl) {
1886     reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, 1 << bsl, mi_row,
1887                            mi_col, bsize);
1888   } else if (bwl == bsl && bhl < bsl) {
1889     reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, bs, mi_row, mi_col,
1890                            bsize);
1891     reset_skip_txfm_size_b(cpi, mi + bs * mis, mis, txfm_max, 1 << bsl, bs,
1892                            mi_row + bs, mi_col, bsize);
1893   } else if (bwl < bsl && bhl == bsl) {
1894     reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, bs, 1 << bsl, mi_row, mi_col,
1895                            bsize);
1896     reset_skip_txfm_size_b(cpi, mi + bs, mis, txfm_max, bs, 1 << bsl, mi_row,
1897                            mi_col + bs, bsize);
1898   } else {
1899     BLOCK_SIZE_TYPE subsize;
1900     int n;
1901
1902     assert(bwl < bsl && bhl < bsl);
1903     if (bsize == BLOCK_SIZE_SB64X64) {
1904       subsize = BLOCK_SIZE_SB32X32;
1905     } else if (bsize == BLOCK_SIZE_SB32X32) {
1906       subsize = BLOCK_SIZE_MB16X16;
1907     } else {
1908       assert(bsize == BLOCK_SIZE_MB16X16);
1909       subsize = BLOCK_SIZE_SB8X8;
1910     }
1911
1912     for (n = 0; n < 4; n++) {
1913       const int y_idx = n >> 1, x_idx = n & 0x01;
1914
1915       reset_skip_txfm_size_sb(cpi, mi + y_idx * bs * mis + x_idx * bs, txfm_max,
1916                               mi_row + y_idx * bs, mi_col + x_idx * bs,
1917                               subsize);
1918     }
1919   }
1920 }
1921
1922 static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
1923   VP9_COMMON * const cm = &cpi->common;
1924   int mi_row, mi_col;
1925   const int mis = cm->mode_info_stride;
1926   MODE_INFO *mi, *mi_ptr = cm->mi;
1927
1928   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
1929     mi = mi_ptr;
1930     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi += 8) {
1931       reset_skip_txfm_size_sb(cpi, mi, txfm_max, mi_row, mi_col,
1932                               BLOCK_SIZE_SB64X64);
1933     }
1934   }
1935 }
1936
1937 static int get_frame_type(VP9_COMP *cpi) {
1938   int frame_type;
1939   if (cpi->common.frame_type == KEY_FRAME)
1940     frame_type = 0;
1941   else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
1942     frame_type = 3;
1943   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
1944     frame_type = 1;
1945   else
1946     frame_type = 2;
1947   return frame_type;
1948 }
1949
1950 static void select_txfm_mode(VP9_COMP *cpi) {
1951   if (cpi->oxcf.lossless) {
1952     cpi->common.txfm_mode = ONLY_4X4;
1953   } else if (cpi->common.current_video_frame == 0) {
1954     cpi->common.txfm_mode = TX_MODE_SELECT;
1955   } else {
1956     if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
1957       int frame_type = get_frame_type(cpi);
1958       cpi->common.txfm_mode =
1959           cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32]
1960           > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
1961           ALLOW_32X32 : TX_MODE_SELECT;
1962     } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
1963       cpi->common.txfm_mode = ALLOW_32X32;
1964     } else {
1965       unsigned int total = 0;
1966       int i;
1967       for (i = 0; i < TX_SIZE_MAX_SB; ++i)
1968         total += cpi->txfm_stepdown_count[i];
1969       if (total) {
1970         double fraction = (double)cpi->txfm_stepdown_count[0] / total;
1971         cpi->common.txfm_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
1972         // printf("fraction = %f\n", fraction);
1973       }  // else keep unchanged
1974     }
1975   }
1976 }
1977
1978 void vp9_encode_frame(VP9_COMP *cpi) {
1979   VP9_COMMON * const cm = &cpi->common;
1980
1981   // In the longer term the encoder should be generalized to match the
1982   // decoder such that we allow compound where one of the 3 buffers has a
1983   // differnt sign bias and that buffer is then the fixed ref. However, this
1984   // requires further work in the rd loop. For now the only supported encoder
1985   // side behaviour is where the ALT ref buffer has oppositie sign bias to
1986   // the other two.
1987   if ((cm->ref_frame_sign_bias[ALTREF_FRAME]
1988        == cm->ref_frame_sign_bias[GOLDEN_FRAME])
1989       || (cm->ref_frame_sign_bias[ALTREF_FRAME]
1990           == cm->ref_frame_sign_bias[LAST_FRAME])) {
1991     cm->allow_comp_inter_inter = 0;
1992   } else {
1993     cm->allow_comp_inter_inter = 1;
1994     cm->comp_fixed_ref = ALTREF_FRAME;
1995     cm->comp_var_ref[0] = LAST_FRAME;
1996     cm->comp_var_ref[1] = GOLDEN_FRAME;
1997   }
1998
1999   if (cpi->sf.RD) {
2000     int i, pred_type;
2001     /*
2002      * This code does a single RD pass over the whole frame assuming
2003      * either compound, single or hybrid prediction as per whatever has
2004      * worked best for that type of frame in the past.
2005      * It also predicts whether another coding mode would have worked
2006      * better that this coding mode. If that is the case, it remembers
2007      * that for subsequent frames.
2008      * It does the same analysis for transform size selection also.
2009      */
2010     int frame_type = get_frame_type(cpi);
2011
2012     /* prediction (compound, single or hybrid) mode selection */
2013     if (frame_type == 3 || !cm->allow_comp_inter_inter)
2014       pred_type = SINGLE_PREDICTION_ONLY;
2015     else if (cpi->rd_prediction_type_threshes[frame_type][1]
2016              > cpi->rd_prediction_type_threshes[frame_type][0]
2017              && cpi->rd_prediction_type_threshes[frame_type][1]
2018              > cpi->rd_prediction_type_threshes[frame_type][2]
2019              && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
2020       pred_type = COMP_PREDICTION_ONLY;
2021     else if (cpi->rd_prediction_type_threshes[frame_type][0]
2022              > cpi->rd_prediction_type_threshes[frame_type][2])
2023       pred_type = SINGLE_PREDICTION_ONLY;
2024     else
2025       pred_type = HYBRID_PREDICTION;
2026
2027     /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
2028
2029     cpi->mb.e_mbd.lossless = 0;
2030     if (cpi->oxcf.lossless) {
2031       cpi->mb.e_mbd.lossless = 1;
2032     }
2033
2034     select_txfm_mode(cpi);
2035     cpi->common.comp_pred_mode = pred_type;
2036     encode_frame_internal(cpi);
2037
2038     for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
2039       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs);
2040       cpi->rd_prediction_type_threshes[frame_type][i] += diff;
2041       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
2042     }
2043
2044     for (i = 0; i < NB_TXFM_MODES; ++i) {
2045       int64_t pd = cpi->rd_tx_select_diff[i];
2046       int diff;
2047       if (i == TX_MODE_SELECT)
2048         pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
2049                      2048 * (TX_SIZE_MAX_SB - 1), 0);
2050       diff = (int) (pd / cpi->common.MBs);
2051       cpi->rd_tx_select_threshes[frame_type][i] += diff;
2052       cpi->rd_tx_select_threshes[frame_type][i] /= 2;
2053     }
2054
2055     if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2056       int single_count_zero = 0;
2057       int comp_count_zero = 0;
2058
2059       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
2060         single_count_zero += cpi->comp_inter_count[i][0];
2061         comp_count_zero += cpi->comp_inter_count[i][1];
2062       }
2063
2064       if (comp_count_zero == 0) {
2065         cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
2066         vp9_zero(cpi->comp_inter_count);
2067       } else if (single_count_zero == 0) {
2068         cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
2069         vp9_zero(cpi->comp_inter_count);
2070       }
2071     }
2072
2073     if (cpi->common.txfm_mode == TX_MODE_SELECT) {
2074       int count4x4 = 0;
2075       int count8x8_lp = 0, count8x8_8x8p = 0;
2076       int count16x16_16x16p = 0, count16x16_lp = 0;
2077       int count32x32 = 0;
2078
2079       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2080         count4x4 += cm->fc.tx_count_32x32p[i][TX_4X4];
2081       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2082         count4x4 += cm->fc.tx_count_16x16p[i][TX_4X4];
2083       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2084         count4x4 += cm->fc.tx_count_8x8p[i][TX_4X4];
2085
2086       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2087         count8x8_lp += cm->fc.tx_count_32x32p[i][TX_8X8];
2088       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2089         count8x8_lp += cm->fc.tx_count_16x16p[i][TX_8X8];
2090
2091       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2092         count8x8_8x8p += cm->fc.tx_count_8x8p[i][TX_8X8];
2093
2094       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2095         count16x16_16x16p += cm->fc.tx_count_16x16p[i][TX_16X16];
2096
2097       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2098         count16x16_lp += cm->fc.tx_count_32x32p[i][TX_16X16];
2099
2100       for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2101         count32x32 += cm->fc.tx_count_32x32p[i][TX_32X32];
2102
2103       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0
2104           && count32x32 == 0) {
2105         cpi->common.txfm_mode = ALLOW_8X8;
2106         reset_skip_txfm_size(cpi, TX_8X8);
2107       } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0
2108                  && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
2109         cpi->common.txfm_mode = ONLY_4X4;
2110         reset_skip_txfm_size(cpi, TX_4X4);
2111       } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
2112         cpi->common.txfm_mode = ALLOW_32X32;
2113       } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
2114         cpi->common.txfm_mode = ALLOW_16X16;
2115         reset_skip_txfm_size(cpi, TX_16X16);
2116       }
2117     }
2118
2119     // Update interpolation filter strategy for next frame.
2120     if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))
2121       vp9_select_interp_filter_type(cpi);
2122   } else {
2123     encode_frame_internal(cpi);
2124   }
2125
2126 }
2127
2128 static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
2129   const MACROBLOCKD *xd = &x->e_mbd;
2130   const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
2131   const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
2132
2133   ++cpi->y_uv_mode_count[m][uvm];
2134   if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
2135     const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
2136     const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
2137     const int bsl = MIN(bwl, bhl);
2138     ++cpi->y_mode_count[MIN(bsl, 3)][m];
2139   } else {
2140     int idx, idy;
2141     int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type);
2142     int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type);
2143     for (idy = 0; idy < 2; idy += bh) {
2144       for (idx = 0; idx < 2; idx += bw) {
2145         int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode;
2146         ++cpi->y_mode_count[0][m];
2147       }
2148     }
2149   }
2150 }
2151
2152 // Experimental stub function to create a per MB zbin adjustment based on
2153 // some previously calculated measure of MB activity.
2154 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
2155 #if USE_ACT_INDEX
2156   x->act_zbin_adj = *(x->mb_activity_ptr);
2157 #else
2158   int64_t a;
2159   int64_t b;
2160   int64_t act = *(x->mb_activity_ptr);
2161
2162   // Apply the masking to the RD multiplier.
2163   a = act + 4 * cpi->activity_avg;
2164   b = 4 * act + cpi->activity_avg;
2165
2166   if (act > cpi->activity_avg)
2167     x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1;
2168   else
2169     x->act_zbin_adj = 1 - (int) (((int64_t) a + (b >> 1)) / b);
2170 #endif
2171 }
2172
2173 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
2174                               int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) {
2175   VP9_COMMON * const cm = &cpi->common;
2176   MACROBLOCK * const x = &cpi->mb;
2177   MACROBLOCKD * const xd = &x->e_mbd;
2178   MODE_INFO *mi = xd->mode_info_context;
2179   MB_MODE_INFO *mbmi = &mi->mbmi;
2180   unsigned int segment_id = mbmi->segment_id;
2181   const int mis = cm->mode_info_stride;
2182   const int bwl = mi_width_log2(bsize);
2183   const int bw = 1 << bwl, bh = 1 << mi_height_log2(bsize);
2184   x->rd_search = 0;
2185
2186   if (cm->frame_type == KEY_FRAME) {
2187     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2188       adjust_act_zbin(cpi, x);
2189       vp9_update_zbin_extra(cpi, x);
2190     }
2191   } else {
2192     vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2193
2194     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2195       // Adjust the zbin based on this MB rate.
2196       adjust_act_zbin(cpi, x);
2197     }
2198
2199     // Experimental code. Special case for gf and arf zeromv modes.
2200     // Increase zbin size to suppress noise
2201     cpi->zbin_mode_boost = 0;
2202     if (cpi->zbin_mode_boost_enabled) {
2203       if (mbmi->ref_frame[0] != INTRA_FRAME) {
2204         if (mbmi->mode == ZEROMV) {
2205           if (mbmi->ref_frame[0] != LAST_FRAME)
2206             cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
2207           else
2208             cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
2209         } else if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
2210           cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
2211         } else {
2212           cpi->zbin_mode_boost = MV_ZBIN_BOOST;
2213         }
2214       } else {
2215         cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
2216       }
2217     }
2218
2219     vp9_update_zbin_extra(cpi, x);
2220   }
2221
2222   if (mbmi->ref_frame[0] == INTRA_FRAME) {
2223     vp9_encode_intra_block_y(
2224         cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2225     vp9_encode_intra_block_uv(
2226         cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2227     if (output_enabled)
2228       sum_intra_stats(cpi, x);
2229   } else {
2230     int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
2231     YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
2232     YV12_BUFFER_CONFIG *second_ref_fb = NULL;
2233     if (mbmi->ref_frame[1] > 0) {
2234       idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
2235       second_ref_fb = &cm->yv12_fb[idx];
2236     }
2237
2238     assert(cm->frame_type != KEY_FRAME);
2239
2240     setup_pre_planes(xd, ref_fb, second_ref_fb, mi_row, mi_col,
2241                      xd->scale_factor, xd->scale_factor_uv);
2242
2243     vp9_build_inter_predictors_sb(
2244         xd, mi_row, mi_col,
2245         bsize < BLOCK_SIZE_SB8X8 ? BLOCK_SIZE_SB8X8 : bsize);
2246   }
2247
2248   if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
2249     vp9_tokenize_sb(cpi, xd, t, !output_enabled,
2250                     (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2251   } else if (!x->skip) {
2252     vp9_encode_sb(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2253     vp9_tokenize_sb(cpi, xd, t, !output_enabled,
2254                     (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2255   } else {
2256     int mb_skip_context = xd->left_available ? (mi - 1)->mbmi.mb_skip_coeff : 0;
2257     mb_skip_context += (mi - mis)->mbmi.mb_skip_coeff;
2258
2259     mbmi->mb_skip_coeff = 1;
2260     if (output_enabled)
2261       cm->fc.mbskip_count[mb_skip_context][1]++;
2262     vp9_reset_sb_tokens_context(
2263         xd, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2264   }
2265
2266   // copy skip flag on all mb_mode_info contexts in this SB
2267   // if this was a skip at this txfm size
2268   vp9_set_pred_flag(xd, bsize, PRED_MBSKIP, mi->mbmi.mb_skip_coeff);
2269
2270   if (output_enabled) {
2271     if (cm->txfm_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_SIZE_SB8X8
2272         && !(mbmi->ref_frame[0] != INTRA_FRAME
2273             && (mbmi->mb_skip_coeff
2274                 || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
2275       const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE);
2276       if (bsize >= BLOCK_SIZE_SB32X32) {
2277         cm->fc.tx_count_32x32p[context][mbmi->txfm_size]++;
2278       } else if (bsize >= BLOCK_SIZE_MB16X16) {
2279         cm->fc.tx_count_16x16p[context][mbmi->txfm_size]++;
2280       } else {
2281         cm->fc.tx_count_8x8p[context][mbmi->txfm_size]++;
2282       }
2283     } else {
2284       int x, y;
2285       TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
2286       // The new intra coding scheme requires no change of transform size
2287       if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
2288         if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
2289           sz = TX_16X16;
2290         if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
2291           sz = TX_8X8;
2292         if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
2293           sz = TX_4X4;
2294       } else if (bsize >= BLOCK_SIZE_SB8X8) {
2295         sz = mbmi->txfm_size;
2296       } else {
2297         sz = TX_4X4;
2298       }
2299
2300       for (y = 0; y < bh; y++) {
2301         for (x = 0; x < bw; x++) {
2302           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) {
2303             mi[mis * y + x].mbmi.txfm_size = sz;
2304           }
2305         }
2306       }
2307     }
2308   }
2309 }