granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI structure in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024 #if CONFIG_RATE_CTRL
1025   free_partition_info(cpi);
1026   free_motion_vector_info(cpi);
1027 #endif
1028
1029   vp9_free_ref_frame_buffers(cm->buffer_pool);
1030 #if CONFIG_VP9_POSTPROC
1031   vp9_free_postproc_buffers(cm);
1032 #endif
1033   vp9_free_context_buffers(cm);
1034
1035   vpx_free_frame_buffer(&cpi->last_frame_uf);
1036   vpx_free_frame_buffer(&cpi->scaled_source);
1037   vpx_free_frame_buffer(&cpi->scaled_last_source);
1038   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1039 #ifdef ENABLE_KF_DENOISE
1040   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1041   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1042 #endif
1043
1044   vp9_lookahead_destroy(cpi->lookahead);
1045
1046   vpx_free(cpi->tile_tok[0][0]);
1047   cpi->tile_tok[0][0] = 0;
1048
1049   vpx_free(cpi->tplist[0][0]);
1050   cpi->tplist[0][0] = NULL;
1051
1052   vp9_free_pc_tree(&cpi->td);
1053
1054   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1055     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1056     vpx_free(lc->rc_twopass_stats_in.buf);
1057     lc->rc_twopass_stats_in.buf = NULL;
1058     lc->rc_twopass_stats_in.sz = 0;
1059   }
1060
1061   if (cpi->source_diff_var != NULL) {
1062     vpx_free(cpi->source_diff_var);
1063     cpi->source_diff_var = NULL;
1064   }
1065
1066   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1067     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1068   }
1069   memset(&cpi->svc.scaled_frames[0], 0,
1070          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1071
1072   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1073   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1074
1075   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1076   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1077
1078   vp9_free_svc_cyclic_refresh(cpi);
1079 }
1080
1081 static void save_coding_context(VP9_COMP *cpi) {
1082   CODING_CONTEXT *const cc = &cpi->coding_context;
1083   VP9_COMMON *cm = &cpi->common;
1084
1085   // Stores a snapshot of key state variables which can subsequently be
1086   // restored with a call to vp9_restore_coding_context. These functions are
1087   // intended for use in a re-code loop in vp9_compress_frame where the
1088   // quantizer value is adjusted between loop iterations.
1089   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1090
1091   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1092          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1093   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1094          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1095   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1096          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1097   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1098          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1099
1100   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1101
1102   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1103          (cm->mi_rows * cm->mi_cols));
1104
1105   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1106   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1107
1108   cc->fc = *cm->fc;
1109 }
1110
1111 static void restore_coding_context(VP9_COMP *cpi) {
1112   CODING_CONTEXT *const cc = &cpi->coding_context;
1113   VP9_COMMON *cm = &cpi->common;
1114
1115   // Restore key state variables to the snapshot state stored in the
1116   // previous call to vp9_save_coding_context.
1117   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1118
1119   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1120   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1121   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1122          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1123   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1124          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1125
1126   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1127
1128   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1129          (cm->mi_rows * cm->mi_cols));
1130
1131   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1132   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1133
1134   *cm->fc = cc->fc;
1135 }
1136
1137 #if !CONFIG_REALTIME_ONLY
1138 static void configure_static_seg_features(VP9_COMP *cpi) {
1139   VP9_COMMON *const cm = &cpi->common;
1140   const RATE_CONTROL *const rc = &cpi->rc;
1141   struct segmentation *const seg = &cm->seg;
1142
1143   int high_q = (int)(rc->avg_q > 48.0);
1144   int qi_delta;
1145
1146   // Disable and clear down for KF
1147   if (cm->frame_type == KEY_FRAME) {
1148     // Clear down the global segmentation map
1149     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1150     seg->update_map = 0;
1151     seg->update_data = 0;
1152     cpi->static_mb_pct = 0;
1153
1154     // Disable segmentation
1155     vp9_disable_segmentation(seg);
1156
1157     // Clear down the segment features.
1158     vp9_clearall_segfeatures(seg);
1159   } else if (cpi->refresh_alt_ref_frame) {
1160     // If this is an alt ref frame
1161     // Clear down the global segmentation map
1162     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1163     seg->update_map = 0;
1164     seg->update_data = 0;
1165     cpi->static_mb_pct = 0;
1166
1167     // Disable segmentation and individual segment features by default
1168     vp9_disable_segmentation(seg);
1169     vp9_clearall_segfeatures(seg);
1170
1171     // Scan frames from current to arf frame.
1172     // This function re-enables segmentation if appropriate.
1173     vp9_update_mbgraph_stats(cpi);
1174
1175     // If segmentation was enabled set those features needed for the
1176     // arf itself.
1177     if (seg->enabled) {
1178       seg->update_map = 1;
1179       seg->update_data = 1;
1180
1181       qi_delta =
1182           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1183       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1184       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1185
1186       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1187       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1188
1189       // Where relevant assume segment data is delta data
1190       seg->abs_delta = SEGMENT_DELTADATA;
1191     }
1192   } else if (seg->enabled) {
1193     // All other frames if segmentation has been enabled
1194
1195     // First normal frame in a valid gf or alt ref group
1196     if (rc->frames_since_golden == 0) {
1197       // Set up segment features for normal frames in an arf group
1198       if (rc->source_alt_ref_active) {
1199         seg->update_map = 0;
1200         seg->update_data = 1;
1201         seg->abs_delta = SEGMENT_DELTADATA;
1202
1203         qi_delta =
1204             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1205         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1206         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1207
1208         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1209         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1210
1211         // Segment coding disabled for compred testing
1212         if (high_q || (cpi->static_mb_pct == 100)) {
1213           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1214           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1215           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1216         }
1217       } else {
1218         // Disable segmentation and clear down features if alt ref
1219         // is not active for this group
1220
1221         vp9_disable_segmentation(seg);
1222
1223         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1224
1225         seg->update_map = 0;
1226         seg->update_data = 0;
1227
1228         vp9_clearall_segfeatures(seg);
1229       }
1230     } else if (rc->is_src_frame_alt_ref) {
1231       // Special case where we are coding over the top of a previous
1232       // alt ref frame.
1233       // Segment coding disabled for compred testing
1234
1235       // Enable ref frame features for segment 0 as well
1236       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1237       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1238
1239       // All mbs should use ALTREF_FRAME
1240       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1241       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1242       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1243       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1244
1245       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1246       if (high_q) {
1247         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1248         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1249       }
1250       // Enable data update
1251       seg->update_data = 1;
1252     } else {
1253       // All other frames.
1254
1255       // No updates.. leave things as they are.
1256       seg->update_map = 0;
1257       seg->update_data = 0;
1258     }
1259   }
1260 }
1261 #endif  // !CONFIG_REALTIME_ONLY
1262
1263 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1264   VP9_COMMON *const cm = &cpi->common;
1265   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1266   uint8_t *cache_ptr = cm->last_frame_seg_map;
1267   int row, col;
1268
1269   for (row = 0; row < cm->mi_rows; row++) {
1270     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1271     uint8_t *cache = cache_ptr;
1272     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1273       cache[0] = mi_8x8[0]->segment_id;
1274     mi_8x8_ptr += cm->mi_stride;
1275     cache_ptr += cm->mi_cols;
1276   }
1277 }
1278
1279 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1280   VP9_COMMON *cm = &cpi->common;
1281   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1282
1283   if (!cpi->lookahead)
1284     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1285                                         cm->subsampling_x, cm->subsampling_y,
1286 #if CONFIG_VP9_HIGHBITDEPTH
1287                                         cm->use_highbitdepth,
1288 #endif
1289                                         oxcf->lag_in_frames);
1290   if (!cpi->lookahead)
1291     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1292                        "Failed to allocate lag buffers");
1293
1294   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1295   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1296                                cm->subsampling_x, cm->subsampling_y,
1297 #if CONFIG_VP9_HIGHBITDEPTH
1298                                cm->use_highbitdepth,
1299 #endif
1300                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1301                                NULL, NULL, NULL))
1302     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1303                        "Failed to allocate altref buffer");
1304 }
1305
1306 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1307   VP9_COMMON *const cm = &cpi->common;
1308   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1309                                cm->subsampling_x, cm->subsampling_y,
1310 #if CONFIG_VP9_HIGHBITDEPTH
1311                                cm->use_highbitdepth,
1312 #endif
1313                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1314                                NULL, NULL, NULL))
1315     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1316                        "Failed to allocate last frame buffer");
1317
1318   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1319                                cm->subsampling_x, cm->subsampling_y,
1320 #if CONFIG_VP9_HIGHBITDEPTH
1321                                cm->use_highbitdepth,
1322 #endif
1323                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1324                                NULL, NULL, NULL))
1325     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1326                        "Failed to allocate scaled source buffer");
1327
1328   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1329   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1330   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1331   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1332       cpi->svc.number_spatial_layers > 2) {
1333     cpi->svc.scaled_temp_is_alloc = 1;
1334     if (vpx_realloc_frame_buffer(
1335             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1336             cm->subsampling_x, cm->subsampling_y,
1337 #if CONFIG_VP9_HIGHBITDEPTH
1338             cm->use_highbitdepth,
1339 #endif
1340             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1341       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1342                          "Failed to allocate scaled_frame for svc ");
1343   }
1344
1345   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1346                                cm->subsampling_x, cm->subsampling_y,
1347 #if CONFIG_VP9_HIGHBITDEPTH
1348                                cm->use_highbitdepth,
1349 #endif
1350                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1351                                NULL, NULL, NULL))
1352     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1353                        "Failed to allocate scaled last source buffer");
1354 #ifdef ENABLE_KF_DENOISE
1355   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1356                                cm->subsampling_x, cm->subsampling_y,
1357 #if CONFIG_VP9_HIGHBITDEPTH
1358                                cm->use_highbitdepth,
1359 #endif
1360                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1361                                NULL, NULL, NULL))
1362     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1363                        "Failed to allocate unscaled raw source frame buffer");
1364
1365   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1366                                cm->subsampling_x, cm->subsampling_y,
1367 #if CONFIG_VP9_HIGHBITDEPTH
1368                                cm->use_highbitdepth,
1369 #endif
1370                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1371                                NULL, NULL, NULL))
1372     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1373                        "Failed to allocate scaled raw source frame buffer");
1374 #endif
1375 }
1376
1377 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1378   VP9_COMMON *cm = &cpi->common;
1379   int mi_size = cm->mi_cols * cm->mi_rows;
1380
1381   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1382   if (!cpi->mbmi_ext_base) return 1;
1383
1384   return 0;
1385 }
1386
1387 static void alloc_compressor_data(VP9_COMP *cpi) {
1388   VP9_COMMON *cm = &cpi->common;
1389   int sb_rows;
1390
1391   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1392
1393   alloc_context_buffers_ext(cpi);
1394
1395   vpx_free(cpi->tile_tok[0][0]);
1396
1397   {
1398     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1399     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1400                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1401   }
1402
1403   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1404   vpx_free(cpi->tplist[0][0]);
1405   CHECK_MEM_ERROR(
1406       cm, cpi->tplist[0][0],
1407       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1408
1409   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1410 }
1411
1412 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1413   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1414   vp9_rc_update_framerate(cpi);
1415 }
1416
1417 static void set_tile_limits(VP9_COMP *cpi) {
1418   VP9_COMMON *const cm = &cpi->common;
1419
1420   int min_log2_tile_cols, max_log2_tile_cols;
1421   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1422
1423   cm->log2_tile_cols =
1424       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1425   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1426
1427   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1428     const int level_tile_cols =
1429         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1430     if (cm->log2_tile_cols > level_tile_cols) {
1431       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1432     }
1433   }
1434 }
1435
1436 static void update_frame_size(VP9_COMP *cpi) {
1437   VP9_COMMON *const cm = &cpi->common;
1438   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1439
1440   vp9_set_mb_mi(cm, cm->width, cm->height);
1441   vp9_init_context_buffers(cm);
1442   vp9_init_macroblockd(cm, xd, NULL);
1443   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1444   memset(cpi->mbmi_ext_base, 0,
1445          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1446
1447   set_tile_limits(cpi);
1448 }
1449
1450 static void init_buffer_indices(VP9_COMP *cpi) {
1451   int ref_frame;
1452
1453   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1454     cpi->ref_fb_idx[ref_frame] = ref_frame;
1455
1456   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1457   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1458   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1459 }
1460
1461 static void init_level_constraint(LevelConstraint *lc) {
1462   lc->level_index = -1;
1463   lc->max_cpb_size = INT_MAX;
1464   lc->max_frame_size = INT_MAX;
1465   lc->fail_flag = 0;
1466 }
1467
1468 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1469   vpx_clear_system_state();
1470   ls->level_index = level_index;
1471   if (level_index >= 0) {
1472     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1473   }
1474 }
1475
1476 static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1477   VP9_COMMON *const cm = &cpi->common;
1478
1479   cpi->oxcf = *oxcf;
1480   cpi->framerate = oxcf->init_framerate;
1481   cm->profile = oxcf->profile;
1482   cm->bit_depth = oxcf->bit_depth;
1483 #if CONFIG_VP9_HIGHBITDEPTH
1484   cm->use_highbitdepth = oxcf->use_highbitdepth;
1485 #endif
1486   cm->color_space = oxcf->color_space;
1487   cm->color_range = oxcf->color_range;
1488
1489   cpi->target_level = oxcf->target_level;
1490   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1491   set_level_constraint(&cpi->level_constraint,
1492                        get_level_index(cpi->target_level));
1493
1494   cm->width = oxcf->width;
1495   cm->height = oxcf->height;
1496   alloc_compressor_data(cpi);
1497
1498   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1499
1500   // Single thread case: use counts in common.
1501   cpi->td.counts = &cm->counts;
1502
1503   // Spatial scalability.
1504   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1505   // Temporal scalability.
1506   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1507
1508   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1509       ((cpi->svc.number_temporal_layers > 1 ||
1510         cpi->svc.number_spatial_layers > 1) &&
1511        cpi->oxcf.pass != 1)) {
1512     vp9_init_layer_context(cpi);
1513   }
1514
1515   // change includes all joint functionality
1516   vp9_change_config(cpi, oxcf);
1517
1518   cpi->static_mb_pct = 0;
1519   cpi->ref_frame_flags = 0;
1520
1521   init_buffer_indices(cpi);
1522
1523   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1524 }
1525
1526 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
1527                                 const VP9EncoderConfig *oxcf) {
1528   const int64_t bandwidth = oxcf->target_bandwidth;
1529   const int64_t starting = oxcf->starting_buffer_level_ms;
1530   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1531   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1532
1533   rc->starting_buffer_level = starting * bandwidth / 1000;
1534   rc->optimal_buffer_level =
1535       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1536   rc->maximum_buffer_size =
1537       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1538 }
1539
1540 #if CONFIG_VP9_HIGHBITDEPTH
1541 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1542 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1543   cpi->fn_ptr[BT].sdf = SDF;                             \
1544   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1545   cpi->fn_ptr[BT].vf = VF;                               \
1546   cpi->fn_ptr[BT].svf = SVF;                             \
1547   cpi->fn_ptr[BT].svaf = SVAF;                           \
1548   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1549   cpi->fn_ptr[BT].sdx8f = NULL;
1550
1551 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1552   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1553                                      int source_stride,                        \
1554                                      const uint8_t *ref_ptr, int ref_stride) { \
1555     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1556   }                                                                            \
1557   static unsigned int fnname##_bits10(                                         \
1558       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1559       int ref_stride) {                                                        \
1560     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1561   }                                                                            \
1562   static unsigned int fnname##_bits12(                                         \
1563       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1564       int ref_stride) {                                                        \
1565     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1566   }
1567
1568 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1569   static unsigned int fnname##_bits8(                                          \
1570       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1571       int ref_stride, const uint8_t *second_pred) {                            \
1572     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1573   }                                                                            \
1574   static unsigned int fnname##_bits10(                                         \
1575       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1576       int ref_stride, const uint8_t *second_pred) {                            \
1577     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1578            2;                                                                  \
1579   }                                                                            \
1580   static unsigned int fnname##_bits12(                                         \
1581       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1582       int ref_stride, const uint8_t *second_pred) {                            \
1583     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1584            4;                                                                  \
1585   }
1586
1587 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1588   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1589                              const uint8_t *const ref_ptr[], int ref_stride,  \
1590                              unsigned int *sad_array) {                       \
1591     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1592   }                                                                           \
1593   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1594                               const uint8_t *const ref_ptr[], int ref_stride, \
1595                               unsigned int *sad_array) {                      \
1596     int i;                                                                    \
1597     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1598     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1599   }                                                                           \
1600   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1601                               const uint8_t *const ref_ptr[], int ref_stride, \
1602                               unsigned int *sad_array) {                      \
1603     int i;                                                                    \
1604     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1605     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1606   }
1607
1608 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1609 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1610 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1611 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1612 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1613 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1614 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1615 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1616 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1617 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1618 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1619 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1620 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1621 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1622 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1623 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1624 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1625 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1626 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1627 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1628 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1629 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1630 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1631 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1632 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1633 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1634 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1635 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1636 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1637 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1638 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1639 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1640 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1641 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1642 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1643 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1644 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1645 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1646 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1647
1648 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1649   VP9_COMMON *const cm = &cpi->common;
1650   if (cm->use_highbitdepth) {
1651     switch (cm->bit_depth) {
1652       case VPX_BITS_8:
1653         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1654                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1655                    vpx_highbd_8_sub_pixel_variance32x16,
1656                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1657                    vpx_highbd_sad32x16x4d_bits8)
1658
1659         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1660                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1661                    vpx_highbd_8_sub_pixel_variance16x32,
1662                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1663                    vpx_highbd_sad16x32x4d_bits8)
1664
1665         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1666                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1667                    vpx_highbd_8_sub_pixel_variance64x32,
1668                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1669                    vpx_highbd_sad64x32x4d_bits8)
1670
1671         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1672                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1673                    vpx_highbd_8_sub_pixel_variance32x64,
1674                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1675                    vpx_highbd_sad32x64x4d_bits8)
1676
1677         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1678                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1679                    vpx_highbd_8_sub_pixel_variance32x32,
1680                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1681                    vpx_highbd_sad32x32x4d_bits8)
1682
1683         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1684                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1685                    vpx_highbd_8_sub_pixel_variance64x64,
1686                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1687                    vpx_highbd_sad64x64x4d_bits8)
1688
1689         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1690                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1691                    vpx_highbd_8_sub_pixel_variance16x16,
1692                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1693                    vpx_highbd_sad16x16x4d_bits8)
1694
1695         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1696                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1697                    vpx_highbd_8_sub_pixel_variance16x8,
1698                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1699                    vpx_highbd_sad16x8x4d_bits8)
1700
1701         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1702                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1703                    vpx_highbd_8_sub_pixel_variance8x16,
1704                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1705                    vpx_highbd_sad8x16x4d_bits8)
1706
1707         HIGHBD_BFP(
1708             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1709             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1710             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1711
1712         HIGHBD_BFP(
1713             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1714             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1715             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1716
1717         HIGHBD_BFP(
1718             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1719             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1720             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1721
1722         HIGHBD_BFP(
1723             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1724             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1725             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1726         break;
1727
1728       case VPX_BITS_10:
1729         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1730                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1731                    vpx_highbd_10_sub_pixel_variance32x16,
1732                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1733                    vpx_highbd_sad32x16x4d_bits10)
1734
1735         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1736                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1737                    vpx_highbd_10_sub_pixel_variance16x32,
1738                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1739                    vpx_highbd_sad16x32x4d_bits10)
1740
1741         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1742                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1743                    vpx_highbd_10_sub_pixel_variance64x32,
1744                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1745                    vpx_highbd_sad64x32x4d_bits10)
1746
1747         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1748                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1749                    vpx_highbd_10_sub_pixel_variance32x64,
1750                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1751                    vpx_highbd_sad32x64x4d_bits10)
1752
1753         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1754                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1755                    vpx_highbd_10_sub_pixel_variance32x32,
1756                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1757                    vpx_highbd_sad32x32x4d_bits10)
1758
1759         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1760                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1761                    vpx_highbd_10_sub_pixel_variance64x64,
1762                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1763                    vpx_highbd_sad64x64x4d_bits10)
1764
1765         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1766                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1767                    vpx_highbd_10_sub_pixel_variance16x16,
1768                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1769                    vpx_highbd_sad16x16x4d_bits10)
1770
1771         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1772                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1773                    vpx_highbd_10_sub_pixel_variance16x8,
1774                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1775                    vpx_highbd_sad16x8x4d_bits10)
1776
1777         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1778                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1779                    vpx_highbd_10_sub_pixel_variance8x16,
1780                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1781                    vpx_highbd_sad8x16x4d_bits10)
1782
1783         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1784                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1785                    vpx_highbd_10_sub_pixel_variance8x8,
1786                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1787                    vpx_highbd_sad8x8x4d_bits10)
1788
1789         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1790                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1791                    vpx_highbd_10_sub_pixel_variance8x4,
1792                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1793                    vpx_highbd_sad8x4x4d_bits10)
1794
1795         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1796                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1797                    vpx_highbd_10_sub_pixel_variance4x8,
1798                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1799                    vpx_highbd_sad4x8x4d_bits10)
1800
1801         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1802                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1803                    vpx_highbd_10_sub_pixel_variance4x4,
1804                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1805                    vpx_highbd_sad4x4x4d_bits10)
1806         break;
1807
1808       default:
1809         assert(cm->bit_depth == VPX_BITS_12);
1810         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1811                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1812                    vpx_highbd_12_sub_pixel_variance32x16,
1813                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1814                    vpx_highbd_sad32x16x4d_bits12)
1815
1816         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1817                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1818                    vpx_highbd_12_sub_pixel_variance16x32,
1819                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1820                    vpx_highbd_sad16x32x4d_bits12)
1821
1822         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1823                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1824                    vpx_highbd_12_sub_pixel_variance64x32,
1825                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1826                    vpx_highbd_sad64x32x4d_bits12)
1827
1828         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1829                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1830                    vpx_highbd_12_sub_pixel_variance32x64,
1831                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1832                    vpx_highbd_sad32x64x4d_bits12)
1833
1834         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1835                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1836                    vpx_highbd_12_sub_pixel_variance32x32,
1837                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1838                    vpx_highbd_sad32x32x4d_bits12)
1839
1840         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1841                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1842                    vpx_highbd_12_sub_pixel_variance64x64,
1843                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1844                    vpx_highbd_sad64x64x4d_bits12)
1845
1846         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1847                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1848                    vpx_highbd_12_sub_pixel_variance16x16,
1849                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1850                    vpx_highbd_sad16x16x4d_bits12)
1851
1852         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1853                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1854                    vpx_highbd_12_sub_pixel_variance16x8,
1855                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1856                    vpx_highbd_sad16x8x4d_bits12)
1857
1858         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1859                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1860                    vpx_highbd_12_sub_pixel_variance8x16,
1861                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1862                    vpx_highbd_sad8x16x4d_bits12)
1863
1864         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1865                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1866                    vpx_highbd_12_sub_pixel_variance8x8,
1867                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1868                    vpx_highbd_sad8x8x4d_bits12)
1869
1870         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1871                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1872                    vpx_highbd_12_sub_pixel_variance8x4,
1873                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1874                    vpx_highbd_sad8x4x4d_bits12)
1875
1876         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1877                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1878                    vpx_highbd_12_sub_pixel_variance4x8,
1879                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1880                    vpx_highbd_sad4x8x4d_bits12)
1881
1882         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1883                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1884                    vpx_highbd_12_sub_pixel_variance4x4,
1885                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1886                    vpx_highbd_sad4x4x4d_bits12)
1887         break;
1888     }
1889   }
1890 }
1891 #endif  // CONFIG_VP9_HIGHBITDEPTH
1892
1893 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1894   VP9_COMMON *const cm = &cpi->common;
1895
1896   // Create the encoder segmentation map and set all entries to 0
1897   vpx_free(cpi->segmentation_map);
1898   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1899                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1900
1901   // Create a map used for cyclic background refresh.
1902   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1903   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1904                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1905
1906   // Create a map used to mark inactive areas.
1907   vpx_free(cpi->active_map.map);
1908   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1909                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1910
1911   // And a place holder structure is the coding context
1912   // for use if we want to save and restore it
1913   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1914   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1915                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1916 }
1917
1918 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1919   VP9_COMMON *const cm = &cpi->common;
1920   if (cpi->prev_partition == NULL) {
1921     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1922                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1923                                              sizeof(*cpi->prev_partition)));
1924   }
1925   if (cpi->prev_segment_id == NULL) {
1926     CHECK_MEM_ERROR(
1927         cm, cpi->prev_segment_id,
1928         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1929                              sizeof(*cpi->prev_segment_id)));
1930   }
1931   if (cpi->prev_variance_low == NULL) {
1932     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1933                     (uint8_t *)vpx_calloc(
1934                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1935                         sizeof(*cpi->prev_variance_low)));
1936   }
1937   if (cpi->copied_frame_cnt == NULL) {
1938     CHECK_MEM_ERROR(
1939         cm, cpi->copied_frame_cnt,
1940         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1941                               sizeof(*cpi->copied_frame_cnt)));
1942   }
1943 }
1944
1945 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1946   VP9_COMMON *const cm = &cpi->common;
1947   RATE_CONTROL *const rc = &cpi->rc;
1948   int last_w = cpi->oxcf.width;
1949   int last_h = cpi->oxcf.height;
1950
1951   vp9_init_quantizer(cpi);
1952   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1953   cm->bit_depth = oxcf->bit_depth;
1954   cm->color_space = oxcf->color_space;
1955   cm->color_range = oxcf->color_range;
1956
1957   cpi->target_level = oxcf->target_level;
1958   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1959   set_level_constraint(&cpi->level_constraint,
1960                        get_level_index(cpi->target_level));
1961
1962   if (cm->profile <= PROFILE_1)
1963     assert(cm->bit_depth == VPX_BITS_8);
1964   else
1965     assert(cm->bit_depth > VPX_BITS_8);
1966
1967   cpi->oxcf = *oxcf;
1968 #if CONFIG_VP9_HIGHBITDEPTH
1969   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1970 #endif  // CONFIG_VP9_HIGHBITDEPTH
1971
1972   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
1973     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
1974   } else {
1975     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
1976   }
1977
1978   cpi->refresh_golden_frame = 0;
1979   cpi->refresh_last_frame = 1;
1980   cm->refresh_frame_context = 1;
1981   cm->reset_frame_context = 0;
1982
1983   vp9_reset_segment_features(&cm->seg);
1984   vp9_set_high_precision_mv(cpi, 0);
1985
1986   {
1987     int i;
1988
1989     for (i = 0; i < MAX_SEGMENTS; i++)
1990       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
1991   }
1992   cpi->encode_breakout = cpi->oxcf.encode_breakout;
1993
1994   set_rc_buffer_sizes(rc, &cpi->oxcf);
1995
1996   // Under a configuration change, where maximum_buffer_size may change,
1997   // keep buffer level clipped to the maximum allowed buffer size.
1998   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1999   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
2000
2001   // Set up frame rate and related parameters rate control values.
2002   vp9_new_framerate(cpi, cpi->framerate);
2003
2004   // Set absolute upper and lower quality limits
2005   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2006   rc->best_quality = cpi->oxcf.best_allowed_q;
2007
2008   cm->interp_filter = cpi->sf.default_interp_filter;
2009
2010   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2011     cm->render_width = cpi->oxcf.render_width;
2012     cm->render_height = cpi->oxcf.render_height;
2013   } else {
2014     cm->render_width = cpi->oxcf.width;
2015     cm->render_height = cpi->oxcf.height;
2016   }
2017   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2018     cm->width = cpi->oxcf.width;
2019     cm->height = cpi->oxcf.height;
2020     cpi->external_resize = 1;
2021   }
2022
2023   if (cpi->initial_width) {
2024     int new_mi_size = 0;
2025     vp9_set_mb_mi(cm, cm->width, cm->height);
2026     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2027     if (cm->mi_alloc_size < new_mi_size) {
2028       vp9_free_context_buffers(cm);
2029       alloc_compressor_data(cpi);
2030       realloc_segmentation_maps(cpi);
2031       cpi->initial_width = cpi->initial_height = 0;
2032       cpi->external_resize = 0;
2033     } else if (cm->mi_alloc_size == new_mi_size &&
2034                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2035       vp9_alloc_loop_filter(cm);
2036     }
2037   }
2038
2039   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2040       last_h != cpi->oxcf.height)
2041     update_frame_size(cpi);
2042
2043   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2044     memset(cpi->consec_zero_mv, 0,
2045            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2046     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2047       vp9_cyclic_refresh_reset_resize(cpi);
2048     rc->rc_1_frame = 0;
2049     rc->rc_2_frame = 0;
2050   }
2051
2052   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2053       ((cpi->svc.number_temporal_layers > 1 ||
2054         cpi->svc.number_spatial_layers > 1) &&
2055        cpi->oxcf.pass != 1)) {
2056     vp9_update_layer_context_change_config(cpi,
2057                                            (int)cpi->oxcf.target_bandwidth);
2058   }
2059
2060   // Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
2061   // configuration change has a large change in avg_frame_bandwidth.
2062   // For SVC check for resetting based on spatial layer average bandwidth.
2063   // Also reset buffer level to optimal level.
2064   if (cm->current_video_frame > (unsigned int)cpi->svc.number_spatial_layers) {
2065     if (cpi->use_svc) {
2066       vp9_svc_check_reset_layer_rc_flag(cpi);
2067     } else {
2068       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
2069           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
2070         rc->rc_1_frame = 0;
2071         rc->rc_2_frame = 0;
2072         rc->bits_off_target = rc->optimal_buffer_level;
2073         rc->buffer_level = rc->optimal_buffer_level;
2074       }
2075     }
2076   }
2077
2078   cpi->alt_ref_source = NULL;
2079   rc->is_src_frame_alt_ref = 0;
2080
2081 #if 0
2082   // Experimental RD Code
2083   cpi->frame_distortion = 0;
2084   cpi->last_frame_distortion = 0;
2085 #endif
2086
2087   set_tile_limits(cpi);
2088
2089   cpi->ext_refresh_frame_flags_pending = 0;
2090   cpi->ext_refresh_frame_context_pending = 0;
2091
2092 #if CONFIG_VP9_HIGHBITDEPTH
2093   highbd_set_var_fns(cpi);
2094 #endif
2095
2096   vp9_set_row_mt(cpi);
2097 }
2098
2099 #ifndef M_LOG2_E
2100 #define M_LOG2_E 0.693147180559945309417
2101 #endif
2102 #define log2f(x) (log(x) / (float)M_LOG2_E)
2103
2104 /***********************************************************************
2105  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2106  ***********************************************************************
2107  * The following 2 functions ('cal_nmvjointsadcost' and                *
2108  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2109  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2110  * function is generic, but the AVX intrinsics optimised version       *
2111  * relies on the following properties of the computed tables:          *
2112  * For cal_nmvjointsadcost:                                            *
2113  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2114  * For cal_nmvsadcosts:                                                *
2115  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2116  *         (Equal costs for both components)                           *
2117  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2118  *         (Cost function is even)                                     *
2119  * If these do not hold, then the AVX optimised version of the         *
2120  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2121  * case you can revert to using the C function instead.                *
2122  ***********************************************************************/
2123
2124 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2125   /*********************************************************************
2126    * Warning: Read the comments above before modifying this function   *
2127    *********************************************************************/
2128   mvjointsadcost[0] = 600;
2129   mvjointsadcost[1] = 300;
2130   mvjointsadcost[2] = 300;
2131   mvjointsadcost[3] = 300;
2132 }
2133
2134 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2135   /*********************************************************************
2136    * Warning: Read the comments above before modifying this function   *
2137    *********************************************************************/
2138   int i = 1;
2139
2140   mvsadcost[0][0] = 0;
2141   mvsadcost[1][0] = 0;
2142
2143   do {
2144     double z = 256 * (2 * (log2f(8 * i) + .6));
2145     mvsadcost[0][i] = (int)z;
2146     mvsadcost[1][i] = (int)z;
2147     mvsadcost[0][-i] = (int)z;
2148     mvsadcost[1][-i] = (int)z;
2149   } while (++i <= MV_MAX);
2150 }
2151
2152 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2153   int i = 1;
2154
2155   mvsadcost[0][0] = 0;
2156   mvsadcost[1][0] = 0;
2157
2158   do {
2159     double z = 256 * (2 * (log2f(8 * i) + .6));
2160     mvsadcost[0][i] = (int)z;
2161     mvsadcost[1][i] = (int)z;
2162     mvsadcost[0][-i] = (int)z;
2163     mvsadcost[1][-i] = (int)z;
2164   } while (++i <= MV_MAX);
2165 }
2166
2167 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2168   int i;
2169   BufferPool *const pool = cm->buffer_pool;
2170   cm->new_fb_idx = INVALID_IDX;
2171   for (i = 0; i < REF_FRAMES; ++i) {
2172     cm->ref_frame_map[i] = INVALID_IDX;
2173   }
2174   for (i = 0; i < FRAME_BUFFERS; ++i) {
2175     pool->frame_bufs[i].ref_count = 0;
2176   }
2177 }
2178
2179 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
2180                                  int subsampling_x, int subsampling_y) {
2181   VP9_COMMON *const cm = &cpi->common;
2182 #if !CONFIG_VP9_HIGHBITDEPTH
2183   (void)use_highbitdepth;
2184   assert(use_highbitdepth == 0);
2185 #endif
2186
2187   if (!cpi->initial_width ||
2188 #if CONFIG_VP9_HIGHBITDEPTH
2189       cm->use_highbitdepth != use_highbitdepth ||
2190 #endif
2191       cm->subsampling_x != subsampling_x ||
2192       cm->subsampling_y != subsampling_y) {
2193     cm->subsampling_x = subsampling_x;
2194     cm->subsampling_y = subsampling_y;
2195 #if CONFIG_VP9_HIGHBITDEPTH
2196     cm->use_highbitdepth = use_highbitdepth;
2197 #endif
2198     alloc_util_frame_buffers(cpi);
2199     cpi->initial_width = cm->width;
2200     cpi->initial_height = cm->height;
2201     cpi->initial_mbs = cm->MBs;
2202   }
2203 }
2204
2205 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2206 static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
2207                                               unsigned int *subsampling_x,
2208                                               unsigned int *subsampling_y) {
2209   switch (fmt) {
2210     case VPX_IMG_FMT_I420:
2211     case VPX_IMG_FMT_YV12:
2212     case VPX_IMG_FMT_I422:
2213     case VPX_IMG_FMT_I42016:
2214     case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
2215     default: *subsampling_x = 0; break;
2216   }
2217
2218   switch (fmt) {
2219     case VPX_IMG_FMT_I420:
2220     case VPX_IMG_FMT_I440:
2221     case VPX_IMG_FMT_YV12:
2222     case VPX_IMG_FMT_I42016:
2223     case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
2224     default: *subsampling_y = 0; break;
2225   }
2226 }
2227
2228 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2229 static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
2230   return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
2231 }
2232
2233 #if CONFIG_VP9_TEMPORAL_DENOISING
2234 static void setup_denoiser_buffer(VP9_COMP *cpi) {
2235   VP9_COMMON *const cm = &cpi->common;
2236   if (cpi->oxcf.noise_sensitivity > 0 &&
2237       !cpi->denoiser.frame_buffer_initialized) {
2238     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
2239                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
2240                            cm->subsampling_x, cm->subsampling_y,
2241 #if CONFIG_VP9_HIGHBITDEPTH
2242                            cm->use_highbitdepth,
2243 #endif
2244                            VP9_ENC_BORDER_IN_PIXELS))
2245       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
2246                          "Failed to allocate denoiser");
2247   }
2248 }
2249 #endif
2250
2251 void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
2252   const VP9EncoderConfig *oxcf = &cpi->oxcf;
2253   unsigned int subsampling_x, subsampling_y;
2254   const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
2255   vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
2256
2257   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
2258 #if CONFIG_VP9_TEMPORAL_DENOISING
2259   setup_denoiser_buffer(cpi);
2260 #endif
2261
2262   assert(cpi->lookahead == NULL);
2263   cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
2264                                       subsampling_y,
2265 #if CONFIG_VP9_HIGHBITDEPTH
2266                                       use_highbitdepth,
2267 #endif
2268                                       oxcf->lag_in_frames);
2269   alloc_raw_frame_buffers(cpi);
2270 }
2271
2272 VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
2273                                 BufferPool *const pool) {
2274   unsigned int i;
2275   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2276   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2277
2278   if (!cm) return NULL;
2279
2280   vp9_zero(*cpi);
2281
2282   if (setjmp(cm->error.jmp)) {
2283     cm->error.setjmp = 0;
2284     vp9_remove_compressor(cpi);
2285     return 0;
2286   }
2287
2288   cm->error.setjmp = 1;
2289   cm->alloc_mi = vp9_enc_alloc_mi;
2290   cm->free_mi = vp9_enc_free_mi;
2291   cm->setup_mi = vp9_enc_setup_mi;
2292
2293   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2294   CHECK_MEM_ERROR(
2295       cm, cm->frame_contexts,
2296       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2297
2298   cpi->use_svc = 0;
2299   cpi->resize_state = ORIG;
2300   cpi->external_resize = 0;
2301   cpi->resize_avg_qp = 0;
2302   cpi->resize_buffer_underflow = 0;
2303   cpi->use_skin_detection = 0;
2304   cpi->common.buffer_pool = pool;
2305   init_ref_frame_bufs(cm);
2306
2307   cpi->force_update_segmentation = 0;
2308
2309   init_config(cpi, oxcf);
2310   cpi->frame_info = vp9_get_frame_info(oxcf);
2311
2312   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2313
2314   init_frame_indexes(cm);
2315   cpi->partition_search_skippable_frame = 0;
2316   cpi->tile_data = NULL;
2317
2318   realloc_segmentation_maps(cpi);
2319
2320   CHECK_MEM_ERROR(
2321       cm, cpi->skin_map,
2322       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2323
2324 #if !CONFIG_REALTIME_ONLY
2325   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2326 #endif
2327
2328   CHECK_MEM_ERROR(
2329       cm, cpi->consec_zero_mv,
2330       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2331
2332   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2333                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2334   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2335                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2336   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2337                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2338   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2339                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2340   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2341                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2342   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2343                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2344   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2345                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2346   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2347                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2348
2349   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2350        i++) {
2351     CHECK_MEM_ERROR(
2352         cm, cpi->mbgraph_stats[i].mb_stats,
2353         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2354   }
2355
2356 #if CONFIG_FP_MB_STATS
2357   cpi->use_fp_mb_stats = 0;
2358   if (cpi->use_fp_mb_stats) {
2359     // a place holder used to store the first pass mb stats in the first pass
2360     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2361                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2362   } else {
2363     cpi->twopass.frame_mb_stats_buf = NULL;
2364   }
2365 #endif
2366
2367   cpi->refresh_alt_ref_frame = 0;
2368   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2369
2370   init_level_info(&cpi->level_info);
2371   init_level_constraint(&cpi->level_constraint);
2372
2373 #if CONFIG_INTERNAL_STATS
2374   cpi->b_calculate_blockiness = 1;
2375   cpi->b_calculate_consistency = 1;
2376   cpi->total_inconsistency = 0;
2377   cpi->psnr.worst = 100.0;
2378   cpi->worst_ssim = 100.0;
2379
2380   cpi->count = 0;
2381   cpi->bytes = 0;
2382
2383   if (cpi->b_calculate_psnr) {
2384     cpi->total_sq_error = 0;
2385     cpi->total_samples = 0;
2386
2387     cpi->totalp_sq_error = 0;
2388     cpi->totalp_samples = 0;
2389
2390     cpi->tot_recode_hits = 0;
2391     cpi->summed_quality = 0;
2392     cpi->summed_weights = 0;
2393     cpi->summedp_quality = 0;
2394     cpi->summedp_weights = 0;
2395   }
2396
2397   cpi->fastssim.worst = 100.0;
2398
2399   cpi->psnrhvs.worst = 100.0;
2400
2401   if (cpi->b_calculate_blockiness) {
2402     cpi->total_blockiness = 0;
2403     cpi->worst_blockiness = 0.0;
2404   }
2405
2406   if (cpi->b_calculate_consistency) {
2407     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2408                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2409                                sizeof(*cpi->ssim_vars) * 4));
2410     cpi->worst_consistency = 100.0;
2411   } else {
2412     cpi->ssim_vars = NULL;
2413   }
2414
2415 #endif
2416
2417   cpi->first_time_stamp_ever = INT64_MAX;
2418
2419   /*********************************************************************
2420    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2421    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2422    *********************************************************************/
2423   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2424   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2425   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2426   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2427   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2428   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2429
2430   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2431   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2432   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2433   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2434   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2435
2436 #if CONFIG_VP9_TEMPORAL_DENOISING
2437 #ifdef OUTPUT_YUV_DENOISED
2438   yuv_denoised_file = fopen("denoised.yuv", "ab");
2439 #endif
2440 #endif
2441 #ifdef OUTPUT_YUV_SKINMAP
2442   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2443 #endif
2444 #ifdef OUTPUT_YUV_REC
2445   yuv_rec_file = fopen("rec.yuv", "wb");
2446 #endif
2447 #ifdef OUTPUT_YUV_SVC_SRC
2448   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2449   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2450   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2451 #endif
2452
2453 #if 0
2454   framepsnr = fopen("framepsnr.stt", "a");
2455   kf_list = fopen("kf_list.stt", "w");
2456 #endif
2457
2458   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2459
2460 #if !CONFIG_REALTIME_ONLY
2461   if (oxcf->pass == 1) {
2462     vp9_init_first_pass(cpi);
2463   } else if (oxcf->pass == 2) {
2464     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2465     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2466
2467     if (cpi->svc.number_spatial_layers > 1 ||
2468         cpi->svc.number_temporal_layers > 1) {
2469       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2470       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2471       int i;
2472
2473       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2474         FIRSTPASS_STATS *const last_packet_for_layer =
2475             &stats[packets - oxcf->ss_number_layers + i];
2476         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2477         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2478         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2479           int num_frames;
2480           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2481
2482           vpx_free(lc->rc_twopass_stats_in.buf);
2483
2484           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2485           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2486                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2487           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2488           lc->twopass.stats_in = lc->twopass.stats_in_start;
2489           lc->twopass.stats_in_end =
2490               lc->twopass.stats_in_start + packets_in_layer - 1;
2491           // Note the last packet is cumulative first pass stats.
2492           // So the number of frames is packet number minus one
2493           num_frames = packets_in_layer - 1;
2494           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2495                                    lc->rc_twopass_stats_in.buf, num_frames);
2496           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2497         }
2498       }
2499
2500       for (i = 0; i < packets; ++i) {
2501         const int layer_id = (int)stats[i].spatial_layer_id;
2502         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2503             stats_copy[layer_id] != NULL) {
2504           *stats_copy[layer_id] = stats[i];
2505           ++stats_copy[layer_id];
2506         }
2507       }
2508
2509       vp9_init_second_pass_spatial_svc(cpi);
2510     } else {
2511       int num_frames;
2512 #if CONFIG_FP_MB_STATS
2513       if (cpi->use_fp_mb_stats) {
2514         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2515         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2516
2517         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2518             oxcf->firstpass_mb_stats_in.buf;
2519         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2520             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2521             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2522       }
2523 #endif
2524
2525       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2526       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2527       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2528       // Note the last packet is cumulative first pass stats.
2529       // So the number of frames is packet number minus one
2530       num_frames = packets - 1;
2531       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2532                                oxcf->two_pass_stats_in.buf, num_frames);
2533
2534       vp9_init_second_pass(cpi);
2535     }
2536   }
2537 #endif  // !CONFIG_REALTIME_ONLY
2538
2539   cpi->mb_wiener_var_cols = 0;
2540   cpi->mb_wiener_var_rows = 0;
2541   cpi->mb_wiener_variance = NULL;
2542
2543   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2544   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2545
2546   {
2547     const int bsize = BLOCK_16X16;
2548     const int w = num_8x8_blocks_wide_lookup[bsize];
2549     const int h = num_8x8_blocks_high_lookup[bsize];
2550     const int num_cols = (cm->mi_cols + w - 1) / w;
2551     const int num_rows = (cm->mi_rows + h - 1) / h;
2552     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2553                     vpx_calloc(num_rows * num_cols,
2554                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2555   }
2556
2557   cpi->kmeans_data_arr_alloc = 0;
2558 #if CONFIG_NON_GREEDY_MV
2559   cpi->tpl_ready = 0;
2560 #endif  // CONFIG_NON_GREEDY_MV
2561   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2562
2563   // Allocate memory to store variances for a frame.
2564   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2565   cpi->source_var_thresh = 0;
2566   cpi->frames_till_next_var_check = 0;
2567 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2568   cpi->fn_ptr[BT].sdf = SDF;                             \
2569   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2570   cpi->fn_ptr[BT].vf = VF;                               \
2571   cpi->fn_ptr[BT].svf = SVF;                             \
2572   cpi->fn_ptr[BT].svaf = SVAF;                           \
2573   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2574   cpi->fn_ptr[BT].sdx8f = SDX8F;
2575
2576   // TODO(angiebird): make sdx8f available for every block size
2577   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2578       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2579       vpx_sad32x16x4d, NULL)
2580
2581   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2582       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2583       vpx_sad16x32x4d, NULL)
2584
2585   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2586       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2587       vpx_sad64x32x4d, NULL)
2588
2589   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2590       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2591       vpx_sad32x64x4d, NULL)
2592
2593   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2594       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2595       vpx_sad32x32x4d, vpx_sad32x32x8)
2596
2597   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2598       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2599       vpx_sad64x64x4d, NULL)
2600
2601   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2602       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2603       vpx_sad16x16x4d, vpx_sad16x16x8)
2604
2605   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2606       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2607       vpx_sad16x8x4d, vpx_sad16x8x8)
2608
2609   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2610       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2611       vpx_sad8x16x4d, vpx_sad8x16x8)
2612
2613   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2614       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2615       vpx_sad8x8x8)
2616
2617   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2618       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2619       NULL)
2620
2621   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2622       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2623       NULL)
2624
2625   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2626       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2627       vpx_sad4x4x8)
2628
2629 #if CONFIG_VP9_HIGHBITDEPTH
2630   highbd_set_var_fns(cpi);
2631 #endif
2632
2633   /* vp9_init_quantizer() is first called here. Add check in
2634    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2635    * called later when needed. This will avoid unnecessary calls of
2636    * vp9_init_quantizer() for every frame.
2637    */
2638   vp9_init_quantizer(cpi);
2639
2640   vp9_loop_filter_init(cm);
2641
2642   // Set up the unit scaling factor used during motion search.
2643 #if CONFIG_VP9_HIGHBITDEPTH
2644   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2645                                     cm->width, cm->height,
2646                                     cm->use_highbitdepth);
2647 #else
2648   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2649                                     cm->width, cm->height);
2650 #endif  // CONFIG_VP9_HIGHBITDEPTH
2651   cpi->td.mb.me_sf = &cpi->me_sf;
2652
2653   cm->error.setjmp = 0;
2654
2655 #if CONFIG_RATE_CTRL
2656   encode_command_init(&cpi->encode_command);
2657   partition_info_init(cpi);
2658   motion_vector_info_init(cpi);
2659 #endif
2660
2661   return cpi;
2662 }
2663
2664 #if CONFIG_INTERNAL_STATS
2665 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2666
2667 #define SNPRINT2(H, T, V) \
2668   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2669 #endif  // CONFIG_INTERNAL_STATS
2670
2671 static void free_tpl_buffer(VP9_COMP *cpi);
2672
2673 void vp9_remove_compressor(VP9_COMP *cpi) {
2674   VP9_COMMON *cm;
2675   unsigned int i;
2676   int t;
2677
2678   if (!cpi) return;
2679
2680 #if CONFIG_INTERNAL_STATS
2681   vpx_free(cpi->ssim_vars);
2682 #endif
2683
2684   cm = &cpi->common;
2685   if (cm->current_video_frame > 0) {
2686 #if CONFIG_INTERNAL_STATS
2687     vpx_clear_system_state();
2688
2689     if (cpi->oxcf.pass != 1) {
2690       char headings[512] = { 0 };
2691       char results[512] = { 0 };
2692       FILE *f = fopen("opsnr.stt", "a");
2693       double time_encoded =
2694           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2695           10000000.000;
2696       double total_encode_time =
2697           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2698       const double dr =
2699           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2700       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2701       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2702       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2703
2704       if (cpi->b_calculate_psnr) {
2705         const double total_psnr = vpx_sse_to_psnr(
2706             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2707         const double totalp_psnr = vpx_sse_to_psnr(
2708             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2709         const double total_ssim =
2710             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2711         const double totalp_ssim =
2712             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2713
2714         snprintf(headings, sizeof(headings),
2715                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2716                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2717                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2718                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2719         snprintf(results, sizeof(results),
2720                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2721                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2722                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2723                  "%7.3f\t%7.3f\t%7.3f",
2724                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2725                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2726                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2727                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2728                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2729                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2730                  cpi->psnr.stat[V] / cpi->count);
2731
2732         if (cpi->b_calculate_blockiness) {
2733           SNPRINT(headings, "\t  Block\tWstBlck");
2734           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2735           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2736         }
2737
2738         if (cpi->b_calculate_consistency) {
2739           double consistency =
2740               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2741                               (double)cpi->total_inconsistency);
2742
2743           SNPRINT(headings, "\tConsist\tWstCons");
2744           SNPRINT2(results, "\t%7.3f", consistency);
2745           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2746         }
2747
2748         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2749         SNPRINT2(results, "\t%8.0f", total_encode_time);
2750         SNPRINT2(results, "\t%7.2f", rate_err);
2751         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2752
2753         fprintf(f, "%s\tAPsnr611\n", headings);
2754         fprintf(
2755             f, "%s\t%7.3f\n", results,
2756             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2757                 (cpi->count * 8));
2758       }
2759
2760       fclose(f);
2761     }
2762 #endif
2763
2764 #if 0
2765     {
2766       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2767       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2768       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2769              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2770              cpi->time_compress_data / 1000,
2771              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2772     }
2773 #endif
2774   }
2775
2776 #if CONFIG_VP9_TEMPORAL_DENOISING
2777   vp9_denoiser_free(&(cpi->denoiser));
2778 #endif
2779
2780   if (cpi->kmeans_data_arr_alloc) {
2781 #if CONFIG_MULTITHREAD
2782     pthread_mutex_destroy(&cpi->kmeans_mutex);
2783 #endif
2784     vpx_free(cpi->kmeans_data_arr);
2785   }
2786
2787   free_tpl_buffer(cpi);
2788
2789   for (t = 0; t < cpi->num_workers; ++t) {
2790     VPxWorker *const worker = &cpi->workers[t];
2791     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2792
2793     // Deallocate allocated threads.
2794     vpx_get_worker_interface()->end(worker);
2795
2796     // Deallocate allocated thread data.
2797     if (t < cpi->num_workers - 1) {
2798       vpx_free(thread_data->td->counts);
2799       vp9_free_pc_tree(thread_data->td);
2800       vpx_free(thread_data->td);
2801     }
2802   }
2803   vpx_free(cpi->tile_thr_data);
2804   vpx_free(cpi->workers);
2805   vp9_row_mt_mem_dealloc(cpi);
2806
2807   if (cpi->num_workers > 1) {
2808     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2809     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2810   }
2811
2812 #if !CONFIG_REALTIME_ONLY
2813   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2814 #endif
2815
2816   dealloc_compressor_data(cpi);
2817
2818   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2819        ++i) {
2820     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2821   }
2822
2823 #if CONFIG_FP_MB_STATS
2824   if (cpi->use_fp_mb_stats) {
2825     vpx_free(cpi->twopass.frame_mb_stats_buf);
2826     cpi->twopass.frame_mb_stats_buf = NULL;
2827   }
2828 #endif
2829
2830   vp9_remove_common(cm);
2831   vp9_free_ref_frame_buffers(cm->buffer_pool);
2832 #if CONFIG_VP9_POSTPROC
2833   vp9_free_postproc_buffers(cm);
2834 #endif
2835   vpx_free(cpi);
2836
2837 #if CONFIG_VP9_TEMPORAL_DENOISING
2838 #ifdef OUTPUT_YUV_DENOISED
2839   fclose(yuv_denoised_file);
2840 #endif
2841 #endif
2842 #ifdef OUTPUT_YUV_SKINMAP
2843   fclose(yuv_skinmap_file);
2844 #endif
2845 #ifdef OUTPUT_YUV_REC
2846   fclose(yuv_rec_file);
2847 #endif
2848 #ifdef OUTPUT_YUV_SVC_SRC
2849   fclose(yuv_svc_src[0]);
2850   fclose(yuv_svc_src[1]);
2851   fclose(yuv_svc_src[2]);
2852 #endif
2853
2854 #if 0
2855
2856   if (keyfile)
2857     fclose(keyfile);
2858
2859   if (framepsnr)
2860     fclose(framepsnr);
2861
2862   if (kf_list)
2863     fclose(kf_list);
2864
2865 #endif
2866 }
2867
2868 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2869   if (is_psnr_calc_enabled(cpi)) {
2870 #if CONFIG_VP9_HIGHBITDEPTH
2871     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2872                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2873 #else
2874     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2875 #endif
2876     return 1;
2877   } else {
2878     vp9_zero(*psnr);
2879     return 0;
2880   }
2881 }
2882
2883 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2884   if (ref_frame_flags > 7) return -1;
2885
2886   cpi->ref_frame_flags = ref_frame_flags;
2887   return 0;
2888 }
2889
2890 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2891   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2892   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2893   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2894   cpi->ext_refresh_frame_flags_pending = 1;
2895 }
2896
2897 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2898     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2899   MV_REFERENCE_FRAME ref_frame = NONE;
2900   if (ref_frame_flag == VP9_LAST_FLAG)
2901     ref_frame = LAST_FRAME;
2902   else if (ref_frame_flag == VP9_GOLD_FLAG)
2903     ref_frame = GOLDEN_FRAME;
2904   else if (ref_frame_flag == VP9_ALT_FLAG)
2905     ref_frame = ALTREF_FRAME;
2906
2907   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2908 }
2909
2910 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2911                            YV12_BUFFER_CONFIG *sd) {
2912   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2913   if (cfg) {
2914     vpx_yv12_copy_frame(cfg, sd);
2915     return 0;
2916   } else {
2917     return -1;
2918   }
2919 }
2920
2921 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2922                           YV12_BUFFER_CONFIG *sd) {
2923   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2924   if (cfg) {
2925     vpx_yv12_copy_frame(sd, cfg);
2926     return 0;
2927   } else {
2928     return -1;
2929   }
2930 }
2931
2932 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2933   cpi->ext_refresh_frame_context = update;
2934   cpi->ext_refresh_frame_context_pending = 1;
2935   return 0;
2936 }
2937
2938 #ifdef OUTPUT_YUV_REC
2939 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2940   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2941   uint8_t *src = s->y_buffer;
2942   int h = cm->height;
2943
2944 #if CONFIG_VP9_HIGHBITDEPTH
2945   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2946     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2947
2948     do {
2949       fwrite(src16, s->y_width, 2, yuv_rec_file);
2950       src16 += s->y_stride;
2951     } while (--h);
2952
2953     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2954     h = s->uv_height;
2955
2956     do {
2957       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2958       src16 += s->uv_stride;
2959     } while (--h);
2960
2961     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2962     h = s->uv_height;
2963
2964     do {
2965       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2966       src16 += s->uv_stride;
2967     } while (--h);
2968
2969     fflush(yuv_rec_file);
2970     return;
2971   }
2972 #endif  // CONFIG_VP9_HIGHBITDEPTH
2973
2974   do {
2975     fwrite(src, s->y_width, 1, yuv_rec_file);
2976     src += s->y_stride;
2977   } while (--h);
2978
2979   src = s->u_buffer;
2980   h = s->uv_height;
2981
2982   do {
2983     fwrite(src, s->uv_width, 1, yuv_rec_file);
2984     src += s->uv_stride;
2985   } while (--h);
2986
2987   src = s->v_buffer;
2988   h = s->uv_height;
2989
2990   do {
2991     fwrite(src, s->uv_width, 1, yuv_rec_file);
2992     src += s->uv_stride;
2993   } while (--h);
2994
2995   fflush(yuv_rec_file);
2996 }
2997 #endif
2998
2999 #if CONFIG_VP9_HIGHBITDEPTH
3000 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3001                                                 YV12_BUFFER_CONFIG *dst,
3002                                                 int bd) {
3003 #else
3004 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3005                                                 YV12_BUFFER_CONFIG *dst) {
3006 #endif  // CONFIG_VP9_HIGHBITDEPTH
3007   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
3008   int i;
3009   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3010                                    src->v_buffer };
3011   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3012   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
3013                               src->uv_crop_width };
3014   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
3015                                src->uv_crop_height };
3016   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3017   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3018   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
3019                               dst->uv_crop_width };
3020   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
3021                                dst->uv_crop_height };
3022
3023   for (i = 0; i < MAX_MB_PLANE; ++i) {
3024 #if CONFIG_VP9_HIGHBITDEPTH
3025     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3026       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
3027                               src_strides[i], dsts[i], dst_heights[i],
3028                               dst_widths[i], dst_strides[i], bd);
3029     } else {
3030       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3031                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3032     }
3033 #else
3034     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3035                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3036 #endif  // CONFIG_VP9_HIGHBITDEPTH
3037   }
3038   vpx_extend_frame_borders(dst);
3039 }
3040
3041 #if CONFIG_VP9_HIGHBITDEPTH
3042 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
3043                                    YV12_BUFFER_CONFIG *dst, int bd,
3044                                    INTERP_FILTER filter_type,
3045                                    int phase_scaler) {
3046   const int src_w = src->y_crop_width;
3047   const int src_h = src->y_crop_height;
3048   const int dst_w = dst->y_crop_width;
3049   const int dst_h = dst->y_crop_height;
3050   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3051                                    src->v_buffer };
3052   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3053   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3054   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3055   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
3056   int x, y, i;
3057
3058   for (i = 0; i < MAX_MB_PLANE; ++i) {
3059     const int factor = (i == 0 || i == 3 ? 1 : 2);
3060     const int src_stride = src_strides[i];
3061     const int dst_stride = dst_strides[i];
3062     for (y = 0; y < dst_h; y += 16) {
3063       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
3064       for (x = 0; x < dst_w; x += 16) {
3065         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
3066         const uint8_t *src_ptr = srcs[i] +
3067                                  (y / factor) * src_h / dst_h * src_stride +
3068                                  (x / factor) * src_w / dst_w;
3069         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
3070
3071         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3072           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
3073                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
3074                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3075                                16 * src_h / dst_h, 16 / factor, 16 / factor,
3076                                bd);
3077         } else {
3078           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
3079                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3080                         16 * src_h / dst_h, 16 / factor, 16 / factor);
3081         }
3082       }
3083     }
3084   }
3085
3086   vpx_extend_frame_borders(dst);
3087 }
3088 #endif  // CONFIG_VP9_HIGHBITDEPTH
3089
3090 #if !CONFIG_REALTIME_ONLY
3091 static int scale_down(VP9_COMP *cpi, int q) {
3092   RATE_CONTROL *const rc = &cpi->rc;
3093   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3094   int scale = 0;
3095   assert(frame_is_kf_gf_arf(cpi));
3096
3097   if (rc->frame_size_selector == UNSCALED &&
3098       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
3099     const int max_size_thresh =
3100         (int)(rate_thresh_mult[SCALE_STEP1] *
3101               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
3102     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
3103   }
3104   return scale;
3105 }
3106
3107 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3108   const RATE_CONTROL *const rc = &cpi->rc;
3109   int big_miss_high;
3110
3111   if (frame_is_kf_gf_arf(cpi))
3112     big_miss_high = rc->this_frame_target * 3 / 2;
3113   else
3114     big_miss_high = rc->this_frame_target * 2;
3115
3116   return big_miss_high;
3117 }
3118
3119 static int big_rate_miss(VP9_COMP *cpi) {
3120   const RATE_CONTROL *const rc = &cpi->rc;
3121   int big_miss_high;
3122   int big_miss_low;
3123
3124   // Ignore for overlay frames
3125   if (rc->is_src_frame_alt_ref) {
3126     return 0;
3127   } else {
3128     big_miss_low = (rc->this_frame_target / 2);
3129     big_miss_high = big_rate_miss_high_threshold(cpi);
3130
3131     return (rc->projected_frame_size > big_miss_high) ||
3132            (rc->projected_frame_size < big_miss_low);
3133   }
3134 }
3135
3136 // test in two pass for the first
3137 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3138   if (cpi->oxcf.pass == 2) {
3139     TWO_PASS *const twopass = &cpi->twopass;
3140     GF_GROUP *const gf_group = &twopass->gf_group;
3141     const int gfg_index = gf_group->index;
3142
3143     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3144     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3145            gf_group->update_type[gfg_index] == LF_UPDATE;
3146   } else {
3147     return 0;
3148   }
3149 }
3150
3151 // Function to test for conditions that indicate we should loop
3152 // back and recode a frame.
3153 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3154                             int maxq, int minq) {
3155   const RATE_CONTROL *const rc = &cpi->rc;
3156   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3157   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3158   int force_recode = 0;
3159
3160   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3161       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3162       (two_pass_first_group_inter(cpi) &&
3163        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3164       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3165     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3166         scale_down(cpi, q)) {
3167       // Code this group at a lower resolution.
3168       cpi->resize_pending = 1;
3169       return 1;
3170     }
3171
3172     // Force recode for extreme overshoot.
3173     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3174         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3175          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3176       return 1;
3177     }
3178
3179     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3180     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3181         (rc->projected_frame_size < low_limit && q > minq)) {
3182       force_recode = 1;
3183     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3184       // Deal with frame undershoot and whether or not we are
3185       // below the automatically set cq level.
3186       if (q > oxcf->cq_level &&
3187           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3188         force_recode = 1;
3189       }
3190     }
3191   }
3192   return force_recode;
3193 }
3194 #endif  // !CONFIG_REALTIME_ONLY
3195
3196 static void update_ref_frames(VP9_COMP *cpi) {
3197   VP9_COMMON *const cm = &cpi->common;
3198   BufferPool *const pool = cm->buffer_pool;
3199   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3200
3201   if (cpi->rc.show_arf_as_gld) {
3202     int tmp = cpi->alt_fb_idx;
3203     cpi->alt_fb_idx = cpi->gld_fb_idx;
3204     cpi->gld_fb_idx = tmp;
3205   } else if (cm->show_existing_frame) {
3206     // Pop ARF.
3207     cpi->lst_fb_idx = cpi->alt_fb_idx;
3208     cpi->alt_fb_idx =
3209         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3210     --gf_group->stack_size;
3211   }
3212
3213   // At this point the new frame has been encoded.
3214   // If any buffer copy / swapping is signaled it should be done here.
3215   if (cm->frame_type == KEY_FRAME) {
3216     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3217                cm->new_fb_idx);
3218     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3219                cm->new_fb_idx);
3220   } else if (vp9_preserve_existing_gf(cpi)) {
3221     // We have decided to preserve the previously existing golden frame as our
3222     // new ARF frame. However, in the short term in function
3223     // vp9_get_refresh_mask() we left it in the GF slot and, if
3224     // we're updating the GF with the current decoded frame, we save it to the
3225     // ARF slot instead.
3226     // We now have to update the ARF with the current frame and swap gld_fb_idx
3227     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3228     // slot and, if we're updating the GF, the current frame becomes the new GF.
3229     int tmp;
3230
3231     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3232                cm->new_fb_idx);
3233
3234     tmp = cpi->alt_fb_idx;
3235     cpi->alt_fb_idx = cpi->gld_fb_idx;
3236     cpi->gld_fb_idx = tmp;
3237   } else { /* For non key/golden frames */
3238     if (cpi->refresh_alt_ref_frame) {
3239       int arf_idx = gf_group->top_arf_idx;
3240
3241       // Push new ARF into stack.
3242       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3243                  gf_group->stack_size);
3244       ++gf_group->stack_size;
3245
3246       assert(arf_idx < REF_FRAMES);
3247
3248       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3249       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3250              cpi->interp_filter_selected[0],
3251              sizeof(cpi->interp_filter_selected[0]));
3252
3253       cpi->alt_fb_idx = arf_idx;
3254     }
3255
3256     if (cpi->refresh_golden_frame) {
3257       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3258                  cm->new_fb_idx);
3259       if (!cpi->rc.is_src_frame_alt_ref)
3260         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3261                cpi->interp_filter_selected[0],
3262                sizeof(cpi->interp_filter_selected[0]));
3263       else
3264         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3265                cpi->interp_filter_selected[ALTREF_FRAME],
3266                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3267     }
3268   }
3269
3270   if (cpi->refresh_last_frame) {
3271     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3272                cm->new_fb_idx);
3273     if (!cpi->rc.is_src_frame_alt_ref)
3274       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3275              cpi->interp_filter_selected[0],
3276              sizeof(cpi->interp_filter_selected[0]));
3277   }
3278
3279   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3280     cpi->alt_fb_idx =
3281         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3282     --gf_group->stack_size;
3283   }
3284 }
3285
3286 void vp9_update_reference_frames(VP9_COMP *cpi) {
3287   update_ref_frames(cpi);
3288
3289 #if CONFIG_VP9_TEMPORAL_DENOISING
3290   vp9_denoiser_update_ref_frame(cpi);
3291 #endif
3292
3293   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3294 }
3295
3296 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3297   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3298   struct loopfilter *lf = &cm->lf;
3299   int is_reference_frame =
3300       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3301        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3302   if (cpi->use_svc &&
3303       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3304     is_reference_frame = !cpi->svc.non_reference_frame;
3305
3306   // Skip loop filter in show_existing_frame mode.
3307   if (cm->show_existing_frame) {
3308     lf->filter_level = 0;
3309     return;
3310   }
3311
3312   if (xd->lossless) {
3313     lf->filter_level = 0;
3314     lf->last_filt_level = 0;
3315   } else {
3316     struct vpx_usec_timer timer;
3317
3318     vpx_clear_system_state();
3319
3320     vpx_usec_timer_start(&timer);
3321
3322     if (!cpi->rc.is_src_frame_alt_ref) {
3323       if ((cpi->common.frame_type == KEY_FRAME) &&
3324           (!cpi->rc.this_key_frame_forced)) {
3325         lf->last_filt_level = 0;
3326       }
3327       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3328       lf->last_filt_level = lf->filter_level;
3329     } else {
3330       lf->filter_level = 0;
3331     }
3332
3333     vpx_usec_timer_mark(&timer);
3334     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3335   }
3336
3337   if (lf->filter_level > 0 && is_reference_frame) {
3338     vp9_build_mask_frame(cm, lf->filter_level, 0);
3339
3340     if (cpi->num_workers > 1)
3341       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3342                                lf->filter_level, 0, 0, cpi->workers,
3343                                cpi->num_workers, &cpi->lf_row_sync);
3344     else
3345       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3346   }
3347
3348   vpx_extend_frame_inner_borders(cm->frame_to_show);
3349 }
3350
3351 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3352   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3353   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3354       new_fb_ptr->mi_cols < cm->mi_cols) {
3355     vpx_free(new_fb_ptr->mvs);
3356     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3357                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3358                                          sizeof(*new_fb_ptr->mvs)));
3359     new_fb_ptr->mi_rows = cm->mi_rows;
3360     new_fb_ptr->mi_cols = cm->mi_cols;
3361   }
3362 }
3363
3364 void vp9_scale_references(VP9_COMP *cpi) {
3365   VP9_COMMON *cm = &cpi->common;
3366   MV_REFERENCE_FRAME ref_frame;
3367   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3368                                      VP9_ALT_FLAG };
3369
3370   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3371     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3372     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3373       BufferPool *const pool = cm->buffer_pool;
3374       const YV12_BUFFER_CONFIG *const ref =
3375           get_ref_frame_buffer(cpi, ref_frame);
3376
3377       if (ref == NULL) {
3378         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3379         continue;
3380       }
3381
3382 #if CONFIG_VP9_HIGHBITDEPTH
3383       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3384         RefCntBuffer *new_fb_ptr = NULL;
3385         int force_scaling = 0;
3386         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3387         if (new_fb == INVALID_IDX) {
3388           new_fb = get_free_fb(cm);
3389           force_scaling = 1;
3390         }
3391         if (new_fb == INVALID_IDX) return;
3392         new_fb_ptr = &pool->frame_bufs[new_fb];
3393         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3394             new_fb_ptr->buf.y_crop_height != cm->height) {
3395           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3396                                        cm->subsampling_x, cm->subsampling_y,
3397                                        cm->use_highbitdepth,
3398                                        VP9_ENC_BORDER_IN_PIXELS,
3399                                        cm->byte_alignment, NULL, NULL, NULL))
3400             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3401                                "Failed to allocate frame buffer");
3402           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3403                                  EIGHTTAP, 0);
3404           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3405           alloc_frame_mvs(cm, new_fb);
3406         }
3407 #else
3408       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3409         RefCntBuffer *new_fb_ptr = NULL;
3410         int force_scaling = 0;
3411         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3412         if (new_fb == INVALID_IDX) {
3413           new_fb = get_free_fb(cm);
3414           force_scaling = 1;
3415         }
3416         if (new_fb == INVALID_IDX) return;
3417         new_fb_ptr = &pool->frame_bufs[new_fb];
3418         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3419             new_fb_ptr->buf.y_crop_height != cm->height) {
3420           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3421                                        cm->subsampling_x, cm->subsampling_y,
3422                                        VP9_ENC_BORDER_IN_PIXELS,
3423                                        cm->byte_alignment, NULL, NULL, NULL))
3424             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3425                                "Failed to allocate frame buffer");
3426           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3427           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3428           alloc_frame_mvs(cm, new_fb);
3429         }
3430 #endif  // CONFIG_VP9_HIGHBITDEPTH
3431       } else {
3432         int buf_idx;
3433         RefCntBuffer *buf = NULL;
3434         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3435           // Check for release of scaled reference.
3436           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3437           if (buf_idx != INVALID_IDX) {
3438             buf = &pool->frame_bufs[buf_idx];
3439             --buf->ref_count;
3440             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3441           }
3442         }
3443         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3444         buf = &pool->frame_bufs[buf_idx];
3445         buf->buf.y_crop_width = ref->y_crop_width;
3446         buf->buf.y_crop_height = ref->y_crop_height;
3447         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3448         ++buf->ref_count;
3449       }
3450     } else {
3451       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3452         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3453     }
3454   }
3455 }
3456
3457 static void release_scaled_references(VP9_COMP *cpi) {
3458   VP9_COMMON *cm = &cpi->common;
3459   int i;
3460   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3461     // Only release scaled references under certain conditions:
3462     // if reference will be updated, or if scaled reference has same resolution.
3463     int refresh[3];
3464     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3465     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3466     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3467     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3468       const int idx = cpi->scaled_ref_idx[i - 1];
3469       if (idx != INVALID_IDX) {
3470         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3471         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3472         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3473                                buf->buf.y_crop_height == ref->y_crop_height)) {
3474           --buf->ref_count;
3475           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3476         }
3477       }
3478     }
3479   } else {
3480     for (i = 0; i < REFS_PER_FRAME; ++i) {
3481       const int idx = cpi->scaled_ref_idx[i];
3482       if (idx != INVALID_IDX) {
3483         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3484         --buf->ref_count;
3485         cpi->scaled_ref_idx[i] = INVALID_IDX;
3486       }
3487     }
3488   }
3489 }
3490
3491 static void full_to_model_count(unsigned int *model_count,
3492                                 unsigned int *full_count) {
3493   int n;
3494   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3495   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3496   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3497   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3498     model_count[TWO_TOKEN] += full_count[n];
3499   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3500 }
3501
3502 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3503                                  vp9_coeff_count *full_count) {
3504   int i, j, k, l;
3505
3506   for (i = 0; i < PLANE_TYPES; ++i)
3507     for (j = 0; j < REF_TYPES; ++j)
3508       for (k = 0; k < COEF_BANDS; ++k)
3509         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3510           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3511 }
3512
3513 #if 0 && CONFIG_INTERNAL_STATS
3514 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3515   VP9_COMMON *const cm = &cpi->common;
3516   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3517   int64_t recon_err;
3518
3519   vpx_clear_system_state();
3520
3521 #if CONFIG_VP9_HIGHBITDEPTH
3522   if (cm->use_highbitdepth) {
3523     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3524   } else {
3525     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3526   }
3527 #else
3528   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3529 #endif  // CONFIG_VP9_HIGHBITDEPTH
3530
3531
3532   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3533     double dc_quant_devisor;
3534 #if CONFIG_VP9_HIGHBITDEPTH
3535     switch (cm->bit_depth) {
3536       case VPX_BITS_8:
3537         dc_quant_devisor = 4.0;
3538         break;
3539       case VPX_BITS_10:
3540         dc_quant_devisor = 16.0;
3541         break;
3542       default:
3543         assert(cm->bit_depth == VPX_BITS_12);
3544         dc_quant_devisor = 64.0;
3545         break;
3546     }
3547 #else
3548     dc_quant_devisor = 4.0;
3549 #endif
3550
3551     if (!cm->current_video_frame) {
3552       fprintf(f, "frame, width, height, last ts, last end ts, "
3553           "source_alt_ref_pending, source_alt_ref_active, "
3554           "this_frame_target, projected_frame_size, "
3555           "projected_frame_size / MBs, "
3556           "projected_frame_size - this_frame_target, "
3557           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3558           "twopass.extend_minq, twopass.extend_minq_fast, "
3559           "total_target_vs_actual, "
3560           "starting_buffer_level - bits_off_target, "
3561           "total_actual_bits, base_qindex, q for base_qindex, "
3562           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3563           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3564           "frame_type, gfu_boost, "
3565           "twopass.bits_left, "
3566           "twopass.total_left_stats.coded_error, "
3567           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3568           "tot_recode_hits, recon_err, kf_boost, "
3569           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3570           "filter_level, seg.aq_av_offset\n");
3571     }
3572
3573     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3574         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3575         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3576         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3577         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3578         cpi->common.current_video_frame,
3579         cm->width, cm->height,
3580         cpi->last_time_stamp_seen,
3581         cpi->last_end_time_stamp_seen,
3582         cpi->rc.source_alt_ref_pending,
3583         cpi->rc.source_alt_ref_active,
3584         cpi->rc.this_frame_target,
3585         cpi->rc.projected_frame_size,
3586         cpi->rc.projected_frame_size / cpi->common.MBs,
3587         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3588         cpi->rc.vbr_bits_off_target,
3589         cpi->rc.vbr_bits_off_target_fast,
3590         cpi->twopass.extend_minq,
3591         cpi->twopass.extend_minq_fast,
3592         cpi->rc.total_target_vs_actual,
3593         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3594         cpi->rc.total_actual_bits, cm->base_qindex,
3595         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3596         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3597             dc_quant_devisor,
3598         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3599                                 cm->bit_depth),
3600         cpi->rc.avg_q,
3601         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3602         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3603         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3604         cpi->twopass.bits_left,
3605         cpi->twopass.total_left_stats.coded_error,
3606         cpi->twopass.bits_left /
3607             (1 + cpi->twopass.total_left_stats.coded_error),
3608         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3609         cpi->twopass.kf_zeromotion_pct,
3610         cpi->twopass.fr_content_type,
3611         cm->lf.filter_level,
3612         cm->seg.aq_av_offset);
3613   }
3614   fclose(f);
3615
3616   if (0) {
3617     FILE *const fmodes = fopen("Modes.stt", "a");
3618     int i;
3619
3620     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3621             cm->frame_type, cpi->refresh_golden_frame,
3622             cpi->refresh_alt_ref_frame);
3623
3624     for (i = 0; i < MAX_MODES; ++i)
3625       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3626
3627     fprintf(fmodes, "\n");
3628
3629     fclose(fmodes);
3630   }
3631 }
3632 #endif
3633
3634 static void set_mv_search_params(VP9_COMP *cpi) {
3635   const VP9_COMMON *const cm = &cpi->common;
3636   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3637
3638   // Default based on max resolution.
3639   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3640
3641   if (cpi->sf.mv.auto_mv_step_size) {
3642     if (frame_is_intra_only(cm)) {
3643       // Initialize max_mv_magnitude for use in the first INTER frame
3644       // after a key/intra-only frame.
3645       cpi->max_mv_magnitude = max_mv_def;
3646     } else {
3647       if (cm->show_frame) {
3648         // Allow mv_steps to correspond to twice the max mv magnitude found
3649         // in the previous frame, capped by the default max_mv_magnitude based
3650         // on resolution.
3651         cpi->mv_step_param = vp9_init_search_range(
3652             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3653       }
3654       cpi->max_mv_magnitude = 0;
3655     }
3656   }
3657 }
3658
3659 static void set_size_independent_vars(VP9_COMP *cpi) {
3660   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3661   vp9_set_rd_speed_thresholds(cpi);
3662   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3663   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3664 }
3665
3666 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3667                                     int *top_index) {
3668   VP9_COMMON *const cm = &cpi->common;
3669
3670   // Setup variables that depend on the dimensions of the frame.
3671   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3672
3673   // Decide q and q bounds.
3674   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3675
3676   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3677     *q = cpi->rc.worst_quality;
3678     cpi->rc.force_max_q = 0;
3679   }
3680
3681   if (!frame_is_intra_only(cm)) {
3682     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3683   }
3684
3685 #if !CONFIG_REALTIME_ONLY
3686   // Configure experimental use of segmentation for enhanced coding of
3687   // static regions if indicated.
3688   // Only allowed in the second pass of a two pass encode, as it requires
3689   // lagged coding, and if the relevant speed feature flag is set.
3690   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3691     configure_static_seg_features(cpi);
3692 #endif  // !CONFIG_REALTIME_ONLY
3693
3694 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3695   if (cpi->oxcf.noise_sensitivity > 0) {
3696     int l = 0;
3697     switch (cpi->oxcf.noise_sensitivity) {
3698       case 1: l = 20; break;
3699       case 2: l = 40; break;
3700       case 3: l = 60; break;
3701       case 4:
3702       case 5: l = 100; break;
3703       case 6: l = 150; break;
3704     }
3705     if (!cpi->common.postproc_state.limits) {
3706       cpi->common.postproc_state.limits =
3707           vpx_calloc(cpi->un_scaled_source->y_width,
3708                      sizeof(*cpi->common.postproc_state.limits));
3709     }
3710     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3711                 cpi->common.postproc_state.limits);
3712   }
3713 #endif  // CONFIG_VP9_POSTPROC
3714 }
3715
3716 static void init_motion_estimation(VP9_COMP *cpi) {
3717   int y_stride = cpi->scaled_source.y_stride;
3718
3719   if (cpi->sf.mv.search_method == NSTEP) {
3720     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3721   } else if (cpi->sf.mv.search_method == DIAMOND) {
3722     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3723   }
3724 }
3725
3726 static void set_frame_size(VP9_COMP *cpi) {
3727   int ref_frame;
3728   VP9_COMMON *const cm = &cpi->common;
3729   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3730   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3731
3732 #if !CONFIG_REALTIME_ONLY
3733   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3734       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3735        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3736     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3737                          &oxcf->scaled_frame_height);
3738
3739     // There has been a change in frame size.
3740     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3741                          oxcf->scaled_frame_height);
3742   }
3743 #endif  // !CONFIG_REALTIME_ONLY
3744
3745   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc &&
3746       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3747     oxcf->scaled_frame_width =
3748         (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3749     oxcf->scaled_frame_height =
3750         (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3751     // There has been a change in frame size.
3752     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3753                          oxcf->scaled_frame_height);
3754
3755     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3756     set_mv_search_params(cpi);
3757
3758     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3759 #if CONFIG_VP9_TEMPORAL_DENOISING
3760     // Reset the denoiser on the resized frame.
3761     if (cpi->oxcf.noise_sensitivity > 0) {
3762       vp9_denoiser_free(&(cpi->denoiser));
3763       setup_denoiser_buffer(cpi);
3764       // Dynamic resize is only triggered for non-SVC, so we can force
3765       // golden frame update here as temporary fix to denoiser.
3766       cpi->refresh_golden_frame = 1;
3767     }
3768 #endif
3769   }
3770
3771   if ((oxcf->pass == 2) && !cpi->use_svc) {
3772     vp9_set_target_rate(cpi);
3773   }
3774
3775   alloc_frame_mvs(cm, cm->new_fb_idx);
3776
3777   // Reset the frame pointers to the current frame size.
3778   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3779                                cm->subsampling_x, cm->subsampling_y,
3780 #if CONFIG_VP9_HIGHBITDEPTH
3781                                cm->use_highbitdepth,
3782 #endif
3783                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3784                                NULL, NULL, NULL))
3785     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3786                        "Failed to allocate frame buffer");
3787
3788   alloc_util_frame_buffers(cpi);
3789   init_motion_estimation(cpi);
3790
3791   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3792     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3793     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3794
3795     ref_buf->idx = buf_idx;
3796
3797     if (buf_idx != INVALID_IDX) {
3798       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3799       ref_buf->buf = buf;
3800 #if CONFIG_VP9_HIGHBITDEPTH
3801       vp9_setup_scale_factors_for_frame(
3802           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3803           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3804 #else
3805       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3806                                         buf->y_crop_height, cm->width,
3807                                         cm->height);
3808 #endif  // CONFIG_VP9_HIGHBITDEPTH
3809       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3810     } else {
3811       ref_buf->buf = NULL;
3812     }
3813   }
3814
3815   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3816 }
3817
3818 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3819 static void save_encode_params(VP9_COMP *cpi) {
3820   VP9_COMMON *const cm = &cpi->common;
3821   const int tile_cols = 1 << cm->log2_tile_cols;
3822   const int tile_rows = 1 << cm->log2_tile_rows;
3823   int tile_col, tile_row;
3824   int i, j;
3825   RD_OPT *rd_opt = &cpi->rd;
3826   for (i = 0; i < MAX_REF_FRAMES; i++) {
3827     for (j = 0; j < REFERENCE_MODES; j++)
3828       rd_opt->prediction_type_threshes_prev[i][j] =
3829           rd_opt->prediction_type_threshes[i][j];
3830
3831     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3832       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3833   }
3834
3835   if (cpi->tile_data != NULL) {
3836     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3837       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3838         TileDataEnc *tile_data =
3839             &cpi->tile_data[tile_row * tile_cols + tile_col];
3840         for (i = 0; i < BLOCK_SIZES; ++i) {
3841           for (j = 0; j < MAX_MODES; ++j) {
3842             tile_data->thresh_freq_fact_prev[i][j] =
3843                 tile_data->thresh_freq_fact[i][j];
3844           }
3845         }
3846       }
3847   }
3848 }
3849 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3850
3851 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3852 #ifdef ENABLE_KF_DENOISE
3853   if (is_spatial_denoise_enabled(cpi)) {
3854     cpi->raw_source_frame = vp9_scale_if_required(
3855         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3856         (oxcf->pass == 0), EIGHTTAP, 0);
3857   } else {
3858     cpi->raw_source_frame = cpi->Source;
3859   }
3860 #else
3861   cpi->raw_source_frame = cpi->Source;
3862 #endif
3863 }
3864
3865 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3866                                       uint8_t *dest) {
3867   VP9_COMMON *const cm = &cpi->common;
3868   SVC *const svc = &cpi->svc;
3869   int q = 0, bottom_index = 0, top_index = 0;
3870   int no_drop_scene_change = 0;
3871   const INTERP_FILTER filter_scaler =
3872       (is_one_pass_cbr_svc(cpi))
3873           ? svc->downsample_filter_type[svc->spatial_layer_id]
3874           : EIGHTTAP;
3875   const int phase_scaler =
3876       (is_one_pass_cbr_svc(cpi))
3877           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3878           : 0;
3879
3880   if (cm->show_existing_frame) {
3881     cpi->rc.this_frame_target = 0;
3882     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3883     return 1;
3884   }
3885
3886   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3887
3888   // Flag to check if its valid to compute the source sad (used for
3889   // scene detection and for superblock content state in CBR mode).
3890   // The flag may get reset below based on SVC or resizing state.
3891   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3892
3893   vpx_clear_system_state();
3894
3895   set_frame_size(cpi);
3896
3897   if (is_one_pass_cbr_svc(cpi) &&
3898       cpi->un_scaled_source->y_width == cm->width << 2 &&
3899       cpi->un_scaled_source->y_height == cm->height << 2 &&
3900       svc->scaled_temp.y_width == cm->width << 1 &&
3901       svc->scaled_temp.y_height == cm->height << 1) {
3902     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3903     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3904     // result will be saved in scaled_temp and might be used later.
3905     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3906     const int phase_scaler2 = svc->downsample_filter_phase[1];
3907     cpi->Source = vp9_svc_twostage_scale(
3908         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3909         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3910     svc->scaled_one_half = 1;
3911   } else if (is_one_pass_cbr_svc(cpi) &&
3912              cpi->un_scaled_source->y_width == cm->width << 1 &&
3913              cpi->un_scaled_source->y_height == cm->height << 1 &&
3914              svc->scaled_one_half) {
3915     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3916     // two-stage scaling, use the result directly.
3917     cpi->Source = &svc->scaled_temp;
3918     svc->scaled_one_half = 0;
3919   } else {
3920     cpi->Source = vp9_scale_if_required(
3921         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3922         filter_scaler, phase_scaler);
3923   }
3924 #ifdef OUTPUT_YUV_SVC_SRC
3925   // Write out at most 3 spatial layers.
3926   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3927     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3928   }
3929 #endif
3930   // Unfiltered raw source used in metrics calculation if the source
3931   // has been filtered.
3932   if (is_psnr_calc_enabled(cpi)) {
3933 #ifdef ENABLE_KF_DENOISE
3934     if (is_spatial_denoise_enabled(cpi)) {
3935       cpi->raw_source_frame = vp9_scale_if_required(
3936           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3937           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3938     } else {
3939       cpi->raw_source_frame = cpi->Source;
3940     }
3941 #else
3942     cpi->raw_source_frame = cpi->Source;
3943 #endif
3944   }
3945
3946   if ((cpi->use_svc &&
3947        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3948         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3949         svc->current_superframe < 1)) ||
3950       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3951       cpi->resize_state != ORIG) {
3952     cpi->compute_source_sad_onepass = 0;
3953     if (cpi->content_state_sb_fd != NULL)
3954       memset(cpi->content_state_sb_fd, 0,
3955              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3956                  sizeof(*cpi->content_state_sb_fd));
3957   }
3958
3959   // Avoid scaling last_source unless its needed.
3960   // Last source is needed if avg_source_sad() is used, or if
3961   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3962   // estimation is enabled.
3963   if (cpi->unscaled_last_source != NULL &&
3964       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3965        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3966         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3967        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3968        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3969        cpi->compute_source_sad_onepass))
3970     cpi->Last_Source = vp9_scale_if_required(
3971         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3972         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3973
3974   if (cpi->Last_Source == NULL ||
3975       cpi->Last_Source->y_width != cpi->Source->y_width ||
3976       cpi->Last_Source->y_height != cpi->Source->y_height)
3977     cpi->compute_source_sad_onepass = 0;
3978
3979   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3980     memset(cpi->consec_zero_mv, 0,
3981            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3982   }
3983
3984 #if CONFIG_VP9_TEMPORAL_DENOISING
3985   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3986     vp9_denoiser_reset_on_first_frame(cpi);
3987 #endif
3988
3989   // Scene detection is always used for VBR mode or screen-content case.
3990   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
3991   // (need to check encoding time cost for doing this for speed 8).
3992   cpi->rc.high_source_sad = 0;
3993   cpi->rc.hybrid_intra_scene_change = 0;
3994   cpi->rc.re_encode_maxq_scene_change = 0;
3995   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
3996       (cpi->oxcf.rc_mode == VPX_VBR ||
3997        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3998        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
3999     vp9_scene_detection_onepass(cpi);
4000
4001   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
4002     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
4003     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
4004     // On scene change reset temporal layer pattern to TL0.
4005     // Note that if the base/lower spatial layers are skipped: instead of
4006     // inserting base layer here, we force max-q for the next superframe
4007     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
4008     // when max-q is decided for the current layer.
4009     // Only do this reset for bypass/flexible mode.
4010     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
4011         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
4012       // rc->high_source_sad will get reset so copy it to restore it.
4013       int tmp_high_source_sad = cpi->rc.high_source_sad;
4014       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
4015       cpi->rc.high_source_sad = tmp_high_source_sad;
4016     }
4017   }
4018
4019   vp9_update_noise_estimate(cpi);
4020
4021   // For 1 pass CBR, check if we are dropping this frame.
4022   // Never drop on key frame, if base layer is key for svc,
4023   // on scene change, or if superframe has layer sync.
4024   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
4025       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
4026     no_drop_scene_change = 1;
4027   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
4028       !frame_is_intra_only(cm) && !no_drop_scene_change &&
4029       !svc->superframe_has_layer_sync &&
4030       (!cpi->use_svc ||
4031        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
4032     if (vp9_rc_drop_frame(cpi)) return 0;
4033   }
4034
4035   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
4036   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
4037   // avoid this frame-level upsampling (for non intra_only frames).
4038   if (frame_is_intra_only(cm) == 0 &&
4039       !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
4040     vp9_scale_references(cpi);
4041   }
4042
4043   set_size_independent_vars(cpi);
4044   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4045
4046   // search method and step parameter might be changed in speed settings.
4047   init_motion_estimation(cpi);
4048
4049   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
4050
4051   if (cpi->sf.svc_use_lowres_part &&
4052       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
4053     if (svc->prev_partition_svc == NULL) {
4054       CHECK_MEM_ERROR(
4055           cm, svc->prev_partition_svc,
4056           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
4057                                    sizeof(*svc->prev_partition_svc)));
4058     }
4059   }
4060
4061   // TODO(jianj): Look into issue of skin detection with high bitdepth.
4062   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
4063       cpi->oxcf.rc_mode == VPX_CBR &&
4064       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
4065       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4066     cpi->use_skin_detection = 1;
4067   }
4068
4069   // Enable post encode frame dropping for CBR on non key frame, when
4070   // ext_use_post_encode_drop is specified by user.
4071   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
4072                                  cpi->oxcf.rc_mode == VPX_CBR &&
4073                                  cm->frame_type != KEY_FRAME;
4074
4075   vp9_set_quantizer(cpi, q);
4076   vp9_set_variance_partition_thresholds(cpi, q, 0);
4077
4078   setup_frame(cpi);
4079
4080   suppress_active_map(cpi);
4081
4082   if (cpi->use_svc) {
4083     // On non-zero spatial layer, check for disabling inter-layer
4084     // prediction.
4085     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4086     vp9_svc_assert_constraints_pattern(cpi);
4087   }
4088
4089   if (cpi->rc.last_post_encode_dropped_scene_change) {
4090     cpi->rc.high_source_sad = 1;
4091     svc->high_source_sad_superframe = 1;
4092     // For now disable use_source_sad since Last_Source will not be the previous
4093     // encoded but the dropped one.
4094     cpi->sf.use_source_sad = 0;
4095     cpi->rc.last_post_encode_dropped_scene_change = 0;
4096   }
4097   // Check if this high_source_sad (scene/slide change) frame should be
4098   // encoded at high/max QP, and if so, set the q and adjust some rate
4099   // control parameters.
4100   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4101       (cpi->rc.high_source_sad ||
4102        (cpi->use_svc && svc->high_source_sad_superframe))) {
4103     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4104       vp9_set_quantizer(cpi, q);
4105       vp9_set_variance_partition_thresholds(cpi, q, 0);
4106     }
4107   }
4108
4109 #if !CONFIG_REALTIME_ONLY
4110   // Variance adaptive and in frame q adjustment experiments are mutually
4111   // exclusive.
4112   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4113     vp9_vaq_frame_setup(cpi);
4114   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4115     vp9_360aq_frame_setup(cpi);
4116   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4117     vp9_setup_in_frame_q_adj(cpi);
4118   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4119     // it may be pretty bad for rate-control,
4120     // and I should handle it somehow
4121     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4122   } else {
4123 #endif
4124     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4125       vp9_cyclic_refresh_setup(cpi);
4126     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4127       apply_roi_map(cpi);
4128     }
4129 #if !CONFIG_REALTIME_ONLY
4130   }
4131 #endif
4132
4133   apply_active_map(cpi);
4134
4135   vp9_encode_frame(cpi);
4136
4137   // Check if we should re-encode this frame at high Q because of high
4138   // overshoot based on the encoded frame size. Only for frames where
4139   // high temporal-source SAD is detected.
4140   // For SVC: all spatial layers are checked for re-encoding.
4141   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4142       (cpi->rc.high_source_sad ||
4143        (cpi->use_svc && svc->high_source_sad_superframe))) {
4144     int frame_size = 0;
4145     // Get an estimate of the encoded frame size.
4146     save_coding_context(cpi);
4147     vp9_pack_bitstream(cpi, dest, size);
4148     restore_coding_context(cpi);
4149     frame_size = (int)(*size) << 3;
4150     // Check if encoded frame will overshoot too much, and if so, set the q and
4151     // adjust some rate control parameters, and return to re-encode the frame.
4152     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4153       vpx_clear_system_state();
4154       vp9_set_quantizer(cpi, q);
4155       vp9_set_variance_partition_thresholds(cpi, q, 0);
4156       suppress_active_map(cpi);
4157       // Turn-off cyclic refresh for re-encoded frame.
4158       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4159         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4160         unsigned char *const seg_map = cpi->segmentation_map;
4161         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4162         memset(cr->last_coded_q_map, MAXQ,
4163                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4164         cr->sb_index = 0;
4165         vp9_disable_segmentation(&cm->seg);
4166       }
4167       apply_active_map(cpi);
4168       vp9_encode_frame(cpi);
4169     }
4170   }
4171
4172   // Update some stats from cyclic refresh, and check for golden frame update.
4173   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4174       !frame_is_intra_only(cm))
4175     vp9_cyclic_refresh_postencode(cpi);
4176
4177   // Update the skip mb flag probabilities based on the distribution
4178   // seen in the last encoder iteration.
4179   // update_base_skip_probs(cpi);
4180   vpx_clear_system_state();
4181   return 1;
4182 }
4183
4184 #if !CONFIG_REALTIME_ONLY
4185 #define MAX_QSTEP_ADJ 4
4186 static int get_qstep_adj(int rate_excess, int rate_limit) {
4187   int qstep =
4188       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4189   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4190 }
4191
4192 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4193                                     uint8_t *dest) {
4194   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4195   VP9_COMMON *const cm = &cpi->common;
4196   RATE_CONTROL *const rc = &cpi->rc;
4197   int bottom_index, top_index;
4198   int loop_count = 0;
4199   int loop_at_this_size = 0;
4200   int loop = 0;
4201   int overshoot_seen = 0;
4202   int undershoot_seen = 0;
4203   int frame_over_shoot_limit;
4204   int frame_under_shoot_limit;
4205   int q = 0, q_low = 0, q_high = 0;
4206   int enable_acl;
4207 #ifdef AGGRESSIVE_VBR
4208   int qrange_adj = 1;
4209 #endif
4210
4211   if (cm->show_existing_frame) {
4212     rc->this_frame_target = 0;
4213     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4214     return;
4215   }
4216
4217   set_size_independent_vars(cpi);
4218
4219   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4220                                        (cpi->twopass.gf_group.index == 1)
4221                                  : 0;
4222
4223   do {
4224     vpx_clear_system_state();
4225
4226     set_frame_size(cpi);
4227
4228     if (loop_count == 0 || cpi->resize_pending != 0) {
4229       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4230
4231 #ifdef AGGRESSIVE_VBR
4232       if (two_pass_first_group_inter(cpi)) {
4233         // Adjustment limits for min and max q
4234         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4235
4236         bottom_index =
4237             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4238         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4239       }
4240 #endif
4241       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4242       set_mv_search_params(cpi);
4243
4244       // Reset the loop state for new frame size.
4245       overshoot_seen = 0;
4246       undershoot_seen = 0;
4247
4248       // Reconfiguration for change in frame size has concluded.
4249       cpi->resize_pending = 0;
4250
4251       q_low = bottom_index;
4252       q_high = top_index;
4253
4254       loop_at_this_size = 0;
4255     }
4256
4257     // Decide frame size bounds first time through.
4258     if (loop_count == 0) {
4259       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4260                                        &frame_under_shoot_limit,
4261                                        &frame_over_shoot_limit);
4262     }
4263
4264     cpi->Source =
4265         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4266                               (oxcf->pass == 0), EIGHTTAP, 0);
4267
4268     // Unfiltered raw source used in metrics calculation if the source
4269     // has been filtered.
4270     if (is_psnr_calc_enabled(cpi)) {
4271 #ifdef ENABLE_KF_DENOISE
4272       if (is_spatial_denoise_enabled(cpi)) {
4273         cpi->raw_source_frame = vp9_scale_if_required(
4274             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4275             (oxcf->pass == 0), EIGHTTAP, 0);
4276       } else {
4277         cpi->raw_source_frame = cpi->Source;
4278       }
4279 #else
4280       cpi->raw_source_frame = cpi->Source;
4281 #endif
4282     }
4283
4284     if (cpi->unscaled_last_source != NULL)
4285       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4286                                                &cpi->scaled_last_source,
4287                                                (oxcf->pass == 0), EIGHTTAP, 0);
4288
4289     if (frame_is_intra_only(cm) == 0) {
4290       if (loop_count > 0) {
4291         release_scaled_references(cpi);
4292       }
4293       vp9_scale_references(cpi);
4294     }
4295
4296 #if CONFIG_RATE_CTRL
4297     // TODO(angiebird): This is a hack for making sure the encoder use the
4298     // external_quantize_index exactly. Avoid this kind of hack later.
4299     if (cpi->encode_command.use_external_quantize_index) {
4300       q = cpi->encode_command.external_quantize_index;
4301     }
4302 #endif
4303
4304     vp9_set_quantizer(cpi, q);
4305
4306     if (loop_count == 0) setup_frame(cpi);
4307
4308     // Variance adaptive and in frame q adjustment experiments are mutually
4309     // exclusive.
4310     if (oxcf->aq_mode == VARIANCE_AQ) {
4311       vp9_vaq_frame_setup(cpi);
4312     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4313       vp9_360aq_frame_setup(cpi);
4314     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4315       vp9_setup_in_frame_q_adj(cpi);
4316     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4317       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4318     } else if (oxcf->aq_mode == PSNR_AQ) {
4319       vp9_psnr_aq_mode_setup(&cm->seg);
4320     }
4321
4322     vp9_encode_frame(cpi);
4323
4324     // Update the skip mb flag probabilities based on the distribution
4325     // seen in the last encoder iteration.
4326     // update_base_skip_probs(cpi);
4327
4328     vpx_clear_system_state();
4329
4330     // Dummy pack of the bitstream using up to date stats to get an
4331     // accurate estimate of output frame size to determine if we need
4332     // to recode.
4333     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4334       save_coding_context(cpi);
4335       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4336
4337       rc->projected_frame_size = (int)(*size) << 3;
4338
4339       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4340     }
4341
4342 #if CONFIG_RATE_CTRL
4343     // This part needs to be after save_coding_context() because
4344     // restore_coding_context will be called in the end of this function.
4345     // TODO(angiebird): This is a hack for making sure the encoder use the
4346     // external_quantize_index exactly. Avoid this kind of hack later.
4347     if (cpi->encode_command.use_external_quantize_index) {
4348       break;
4349     }
4350 #endif
4351
4352     if (oxcf->rc_mode == VPX_Q) {
4353       loop = 0;
4354     } else {
4355       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4356           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4357         int last_q = q;
4358         int64_t kf_err;
4359
4360         int64_t high_err_target = cpi->ambient_err;
4361         int64_t low_err_target = cpi->ambient_err >> 1;
4362
4363 #if CONFIG_VP9_HIGHBITDEPTH
4364         if (cm->use_highbitdepth) {
4365           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4366         } else {
4367           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4368         }
4369 #else
4370         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4371 #endif  // CONFIG_VP9_HIGHBITDEPTH
4372
4373         // Prevent possible divide by zero error below for perfect KF
4374         kf_err += !kf_err;
4375
4376         // The key frame is not good enough or we can afford
4377         // to make it better without undue risk of popping.
4378         if ((kf_err > high_err_target &&
4379              rc->projected_frame_size <= frame_over_shoot_limit) ||
4380             (kf_err > low_err_target &&
4381              rc->projected_frame_size <= frame_under_shoot_limit)) {
4382           // Lower q_high
4383           q_high = q > q_low ? q - 1 : q_low;
4384
4385           // Adjust Q
4386           q = (int)((q * high_err_target) / kf_err);
4387           q = VPXMIN(q, (q_high + q_low) >> 1);
4388         } else if (kf_err < low_err_target &&
4389                    rc->projected_frame_size >= frame_under_shoot_limit) {
4390           // The key frame is much better than the previous frame
4391           // Raise q_low
4392           q_low = q < q_high ? q + 1 : q_high;
4393
4394           // Adjust Q
4395           q = (int)((q * low_err_target) / kf_err);
4396           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4397         }
4398
4399         // Clamp Q to upper and lower limits:
4400         q = clamp(q, q_low, q_high);
4401
4402         loop = q != last_q;
4403       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4404                                   frame_under_shoot_limit, q,
4405                                   VPXMAX(q_high, top_index), bottom_index)) {
4406         // Is the projected frame size out of range and are we allowed
4407         // to attempt to recode.
4408         int last_q = q;
4409         int retries = 0;
4410         int qstep;
4411
4412         if (cpi->resize_pending == 1) {
4413           // Change in frame size so go back around the recode loop.
4414           cpi->rc.frame_size_selector =
4415               SCALE_STEP1 - cpi->rc.frame_size_selector;
4416           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4417
4418 #if CONFIG_INTERNAL_STATS
4419           ++cpi->tot_recode_hits;
4420 #endif
4421           ++loop_count;
4422           loop = 1;
4423           continue;
4424         }
4425
4426         // Frame size out of permitted range:
4427         // Update correction factor & compute new Q to try...
4428
4429         // Frame is too large
4430         if (rc->projected_frame_size > rc->this_frame_target) {
4431           // Special case if the projected size is > the max allowed.
4432           if ((q == q_high) &&
4433               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4434                (!rc->is_src_frame_alt_ref &&
4435                 (rc->projected_frame_size >=
4436                  big_rate_miss_high_threshold(cpi))))) {
4437             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4438                                             big_rate_miss_high_threshold(cpi)));
4439             double q_val_high;
4440             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4441             q_val_high =
4442                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4443             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4444             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4445           }
4446
4447           // Raise Qlow as to at least the current value
4448           qstep =
4449               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4450           q_low = VPXMIN(q + qstep, q_high);
4451
4452           if (undershoot_seen || loop_at_this_size > 1) {
4453             // Update rate_correction_factor unless
4454             vp9_rc_update_rate_correction_factors(cpi);
4455
4456             q = (q_high + q_low + 1) / 2;
4457           } else {
4458             // Update rate_correction_factor unless
4459             vp9_rc_update_rate_correction_factors(cpi);
4460
4461             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4462                                   VPXMAX(q_high, top_index));
4463
4464             while (q < q_low && retries < 10) {
4465               vp9_rc_update_rate_correction_factors(cpi);
4466               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4467                                     VPXMAX(q_high, top_index));
4468               retries++;
4469             }
4470           }
4471
4472           overshoot_seen = 1;
4473         } else {
4474           // Frame is too small
4475           qstep =
4476               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4477           q_high = VPXMAX(q - qstep, q_low);
4478
4479           if (overshoot_seen || loop_at_this_size > 1) {
4480             vp9_rc_update_rate_correction_factors(cpi);
4481             q = (q_high + q_low) / 2;
4482           } else {
4483             vp9_rc_update_rate_correction_factors(cpi);
4484             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4485                                   VPXMIN(q_low, bottom_index), top_index);
4486             // Special case reset for qlow for constrained quality.
4487             // This should only trigger where there is very substantial
4488             // undershoot on a frame and the auto cq level is above
4489             // the user passed in value.
4490             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4491               q_low = q;
4492             }
4493
4494             while (q > q_high && retries < 10) {
4495               vp9_rc_update_rate_correction_factors(cpi);
4496               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4497                                     VPXMIN(q_low, bottom_index), top_index);
4498               retries++;
4499             }
4500           }
4501           undershoot_seen = 1;
4502         }
4503
4504         // Clamp Q to upper and lower limits:
4505         q = clamp(q, q_low, q_high);
4506
4507         loop = (q != last_q);
4508       } else {
4509         loop = 0;
4510       }
4511     }
4512
4513     // Special case for overlay frame.
4514     if (rc->is_src_frame_alt_ref &&
4515         rc->projected_frame_size < rc->max_frame_bandwidth)
4516       loop = 0;
4517
4518     if (loop) {
4519       ++loop_count;
4520       ++loop_at_this_size;
4521
4522 #if CONFIG_INTERNAL_STATS
4523       ++cpi->tot_recode_hits;
4524 #endif
4525     }
4526
4527     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4528       if (loop) restore_coding_context(cpi);
4529   } while (loop);
4530
4531 #ifdef AGGRESSIVE_VBR
4532   if (two_pass_first_group_inter(cpi)) {
4533     cpi->twopass.active_worst_quality =
4534         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4535   } else if (!frame_is_kf_gf_arf(cpi)) {
4536 #else
4537   if (!frame_is_kf_gf_arf(cpi)) {
4538 #endif
4539     // Have we been forced to adapt Q outside the expected range by an extreme
4540     // rate miss. If so adjust the active maxQ for the subsequent frames.
4541     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4542       cpi->twopass.active_worst_quality = q;
4543     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4544                rc->projected_frame_size < rc->this_frame_target) {
4545       cpi->twopass.active_worst_quality =
4546           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4547     }
4548   }
4549
4550   if (enable_acl) {
4551     // Skip recoding, if model diff is below threshold
4552     const int thresh = compute_context_model_thresh(cpi);
4553     const int diff = compute_context_model_diff(cm);
4554     if (diff >= thresh) {
4555       vp9_encode_frame(cpi);
4556     }
4557   }
4558   if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4559     vpx_clear_system_state();
4560     restore_coding_context(cpi);
4561   }
4562 }
4563 #endif  // !CONFIG_REALTIME_ONLY
4564
4565 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4566   const int *const map = cpi->common.ref_frame_map;
4567   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4568   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4569   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4570   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4571
4572   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4573
4574   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4575       (cpi->svc.number_temporal_layers == 1 &&
4576        cpi->svc.number_spatial_layers == 1))
4577     flags &= ~VP9_GOLD_FLAG;
4578
4579   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4580
4581   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4582
4583   return flags;
4584 }
4585
4586 static void set_ext_overrides(VP9_COMP *cpi) {
4587   // Overrides the defaults with the externally supplied values with
4588   // vp9_update_reference() and vp9_update_entropy() calls
4589   // Note: The overrides are valid only for the next frame passed
4590   // to encode_frame_to_data_rate() function
4591   if (cpi->ext_refresh_frame_context_pending) {
4592     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4593     cpi->ext_refresh_frame_context_pending = 0;
4594   }
4595   if (cpi->ext_refresh_frame_flags_pending) {
4596     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4597     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4598     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4599   }
4600 }
4601
4602 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4603     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4604     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4605     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4606   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4607       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4608 #if CONFIG_VP9_HIGHBITDEPTH
4609     if (cm->bit_depth == VPX_BITS_8) {
4610       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4611                                  phase_scaler2);
4612       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4613                                  phase_scaler);
4614     } else {
4615       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4616                              filter_type2, phase_scaler2);
4617       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4618                              filter_type, phase_scaler);
4619     }
4620 #else
4621     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4622                                phase_scaler2);
4623     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4624 #endif  // CONFIG_VP9_HIGHBITDEPTH
4625     return scaled;
4626   } else {
4627     return unscaled;
4628   }
4629 }
4630
4631 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4632     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4633     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4634   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4635       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4636 #if CONFIG_VP9_HIGHBITDEPTH
4637     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4638         unscaled->y_height <= (scaled->y_height << 1))
4639       if (cm->bit_depth == VPX_BITS_8)
4640         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4641       else
4642         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4643                                filter_type, phase_scaler);
4644     else
4645       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4646 #else
4647     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4648         unscaled->y_height <= (scaled->y_height << 1))
4649       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4650     else
4651       scale_and_extend_frame_nonnormative(unscaled, scaled);
4652 #endif  // CONFIG_VP9_HIGHBITDEPTH
4653     return scaled;
4654   } else {
4655     return unscaled;
4656   }
4657 }
4658
4659 static void set_ref_sign_bias(VP9_COMP *cpi) {
4660   VP9_COMMON *const cm = &cpi->common;
4661   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4662   const int cur_frame_index = ref_buffer->frame_index;
4663   MV_REFERENCE_FRAME ref_frame;
4664
4665   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4666     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4667     const RefCntBuffer *const ref_cnt_buf =
4668         get_ref_cnt_buffer(&cpi->common, buf_idx);
4669     if (ref_cnt_buf) {
4670       cm->ref_frame_sign_bias[ref_frame] =
4671           cur_frame_index < ref_cnt_buf->frame_index;
4672     }
4673   }
4674 }
4675
4676 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4677   INTERP_FILTER ifilter;
4678   int ref_total[MAX_REF_FRAMES] = { 0 };
4679   MV_REFERENCE_FRAME ref;
4680   int mask = 0;
4681   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4682     return mask;
4683   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4684     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4685       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4686
4687   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4688     if ((ref_total[LAST_FRAME] &&
4689          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4690         (ref_total[GOLDEN_FRAME] == 0 ||
4691          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4692              ref_total[GOLDEN_FRAME]) &&
4693         (ref_total[ALTREF_FRAME] == 0 ||
4694          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4695              ref_total[ALTREF_FRAME]))
4696       mask |= 1 << ifilter;
4697   }
4698   return mask;
4699 }
4700
4701 #ifdef ENABLE_KF_DENOISE
4702 // Baseline kernel weights for denoise
4703 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4704 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4705                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4706
4707 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4708                                      uint8_t point_weight, int *sum_val,
4709                                      int *sum_weight) {
4710   if (abs(centre_val - data_val) <= thresh) {
4711     *sum_weight += point_weight;
4712     *sum_val += (int)data_val * (int)point_weight;
4713   }
4714 }
4715
4716 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4717                                   const int strength) {
4718   int sum_weight = 0;
4719   int sum_val = 0;
4720   int thresh = strength;
4721   int kernal_size = 5;
4722   int half_k_size = 2;
4723   int i, j;
4724   int max_diff = 0;
4725   uint8_t *tmp_ptr;
4726   uint8_t *kernal_ptr;
4727
4728   // Find the maximum deviation from the source point in the locale.
4729   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4730   for (i = 0; i < kernal_size + 2; ++i) {
4731     for (j = 0; j < kernal_size + 2; ++j) {
4732       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4733     }
4734     tmp_ptr += stride;
4735   }
4736
4737   // Select the kernel size.
4738   if (max_diff > (strength + (strength >> 1))) {
4739     kernal_size = 3;
4740     half_k_size = 1;
4741     thresh = thresh >> 1;
4742   }
4743   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4744
4745   // Apply the kernel
4746   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4747   for (i = 0; i < kernal_size; ++i) {
4748     for (j = 0; j < kernal_size; ++j) {
4749       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4750                         &sum_val, &sum_weight);
4751       ++kernal_ptr;
4752     }
4753     tmp_ptr += stride;
4754   }
4755
4756   // Update the source value with the new filtered value
4757   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4758 }
4759
4760 #if CONFIG_VP9_HIGHBITDEPTH
4761 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4762                                          const int strength) {
4763   int sum_weight = 0;
4764   int sum_val = 0;
4765   int thresh = strength;
4766   int kernal_size = 5;
4767   int half_k_size = 2;
4768   int i, j;
4769   int max_diff = 0;
4770   uint16_t *tmp_ptr;
4771   uint8_t *kernal_ptr;
4772
4773   // Find the maximum deviation from the source point in the locale.
4774   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4775   for (i = 0; i < kernal_size + 2; ++i) {
4776     for (j = 0; j < kernal_size + 2; ++j) {
4777       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4778     }
4779     tmp_ptr += stride;
4780   }
4781
4782   // Select the kernel size.
4783   if (max_diff > (strength + (strength >> 1))) {
4784     kernal_size = 3;
4785     half_k_size = 1;
4786     thresh = thresh >> 1;
4787   }
4788   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4789
4790   // Apply the kernel
4791   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4792   for (i = 0; i < kernal_size; ++i) {
4793     for (j = 0; j < kernal_size; ++j) {
4794       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4795                         &sum_val, &sum_weight);
4796       ++kernal_ptr;
4797     }
4798     tmp_ptr += stride;
4799   }
4800
4801   // Update the source value with the new filtered value
4802   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4803 }
4804 #endif  // CONFIG_VP9_HIGHBITDEPTH
4805
4806 // Apply thresholded spatial noise suppression to a given buffer.
4807 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4808                                    const int stride, const int width,
4809                                    const int height, const int strength) {
4810   VP9_COMMON *const cm = &cpi->common;
4811   uint8_t *src_ptr = buffer;
4812   int row;
4813   int col;
4814
4815   for (row = 0; row < height; ++row) {
4816     for (col = 0; col < width; ++col) {
4817 #if CONFIG_VP9_HIGHBITDEPTH
4818       if (cm->use_highbitdepth)
4819         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
4820                                      strength);
4821       else
4822         spatial_denoise_point(&src_ptr[col], stride, strength);
4823 #else
4824       spatial_denoise_point(&src_ptr[col], stride, strength);
4825 #endif  // CONFIG_VP9_HIGHBITDEPTH
4826     }
4827     src_ptr += stride;
4828   }
4829 }
4830
4831 // Apply thresholded spatial noise suppression to source.
4832 static void spatial_denoise_frame(VP9_COMP *cpi) {
4833   YV12_BUFFER_CONFIG *src = cpi->Source;
4834   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4835   TWO_PASS *const twopass = &cpi->twopass;
4836   VP9_COMMON *const cm = &cpi->common;
4837
4838   // Base the filter strength on the current active max Q.
4839   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
4840                                               cm->bit_depth));
4841   int strength =
4842       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
4843
4844   // Denoise each of Y,U and V buffers.
4845   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
4846                          src->y_height, strength);
4847
4848   strength += (strength >> 1);
4849   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
4850                          src->uv_height, strength << 1);
4851
4852   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
4853                          src->uv_height, strength << 1);
4854 }
4855 #endif  // ENABLE_KF_DENOISE
4856
4857 #if !CONFIG_REALTIME_ONLY
4858 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
4859                                          uint8_t *dest) {
4860   if (cpi->common.seg.enabled)
4861     if (ALT_REF_AQ_PROTECT_GAIN) {
4862       size_t nsize = *size;
4863       int overhead;
4864
4865       // TODO(yuryg): optimize this, as
4866       // we don't really need to repack
4867
4868       save_coding_context(cpi);
4869       vp9_disable_segmentation(&cpi->common.seg);
4870       vp9_pack_bitstream(cpi, dest, &nsize);
4871       restore_coding_context(cpi);
4872
4873       overhead = (int)*size - (int)nsize;
4874
4875       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
4876         vp9_encode_frame(cpi);
4877       else
4878         vp9_enable_segmentation(&cpi->common.seg);
4879     }
4880 }
4881 #endif
4882
4883 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
4884   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4885
4886   if (ref_buffer) {
4887     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
4888     ref_buffer->frame_index =
4889         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
4890 #if CONFIG_RATE_CTRL
4891     ref_buffer->frame_coding_index = cm->current_frame_coding_index;
4892 #endif  // CONFIG_RATE_CTRL
4893   }
4894 }
4895
4896 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
4897   VP9_COMMON *cm = &cpi->common;
4898   ThreadData *td = &cpi->td;
4899   MACROBLOCK *x = &td->mb;
4900   MACROBLOCKD *xd = &x->e_mbd;
4901   uint8_t *y_buffer = cpi->Source->y_buffer;
4902   const int y_stride = cpi->Source->y_stride;
4903   const int block_size = BLOCK_16X16;
4904
4905   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
4906   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
4907   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
4908   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
4909   double log_sum = 0.0;
4910   int row, col;
4911
4912   // Loop through each 64x64 block.
4913   for (row = 0; row < num_rows; ++row) {
4914     for (col = 0; col < num_cols; ++col) {
4915       int mi_row, mi_col;
4916       double var = 0.0, num_of_var = 0.0;
4917       const int index = row * num_cols + col;
4918
4919       for (mi_row = row * num_8x8_h;
4920            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
4921         for (mi_col = col * num_8x8_w;
4922              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
4923           struct buf_2d buf;
4924           const int row_offset_y = mi_row << 3;
4925           const int col_offset_y = mi_col << 3;
4926
4927           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
4928           buf.stride = y_stride;
4929
4930           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
4931           // and high bit videos, the variance needs to be divided by 2.0 or
4932           // 64.0 separately.
4933           // TODO(sdeng): need to tune for 12bit videos.
4934 #if CONFIG_VP9_HIGHBITDEPTH
4935           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
4936             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
4937           else
4938 #endif
4939             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
4940
4941           num_of_var += 1.0;
4942         }
4943       }
4944       var = var / num_of_var / 64.0;
4945
4946       // Curve fitting with an exponential model on all 16x16 blocks from the
4947       // Midres dataset.
4948       var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
4949       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
4950       log_sum += log(var);
4951     }
4952   }
4953   log_sum = exp(log_sum / (double)(num_rows * num_cols));
4954
4955   for (row = 0; row < num_rows; ++row) {
4956     for (col = 0; col < num_cols; ++col) {
4957       const int index = row * num_cols + col;
4958       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
4959     }
4960   }
4961
4962   (void)xd;
4963 }
4964
4965 // Process the wiener variance in 16x16 block basis.
4966 static int qsort_comp(const void *elem1, const void *elem2) {
4967   int a = *((const int *)elem1);
4968   int b = *((const int *)elem2);
4969   if (a > b) return 1;
4970   if (a < b) return -1;
4971   return 0;
4972 }
4973
4974 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
4975   VP9_COMMON *cm = &cpi->common;
4976
4977   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
4978       cpi->mb_wiener_var_cols >= cm->mb_cols)
4979     return;
4980
4981   vpx_free(cpi->mb_wiener_variance);
4982   cpi->mb_wiener_variance = NULL;
4983
4984   CHECK_MEM_ERROR(
4985       cm, cpi->mb_wiener_variance,
4986       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
4987   cpi->mb_wiener_var_rows = cm->mb_rows;
4988   cpi->mb_wiener_var_cols = cm->mb_cols;
4989 }
4990
4991 static void set_mb_wiener_variance(VP9_COMP *cpi) {
4992   VP9_COMMON *cm = &cpi->common;
4993   uint8_t *buffer = cpi->Source->y_buffer;
4994   int buf_stride = cpi->Source->y_stride;
4995
4996 #if CONFIG_VP9_HIGHBITDEPTH
4997   ThreadData *td = &cpi->td;
4998   MACROBLOCK *x = &td->mb;
4999   MACROBLOCKD *xd = &x->e_mbd;
5000   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
5001   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
5002   uint8_t *zero_pred;
5003 #else
5004   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
5005 #endif
5006
5007   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
5008   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
5009
5010   int mb_row, mb_col, count = 0;
5011   // Hard coded operating block size
5012   const int block_size = 16;
5013   const int coeff_count = block_size * block_size;
5014   const TX_SIZE tx_size = TX_16X16;
5015
5016 #if CONFIG_VP9_HIGHBITDEPTH
5017   xd->cur_buf = cpi->Source;
5018   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5019     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
5020     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
5021   } else {
5022     zero_pred = zero_pred8;
5023     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
5024   }
5025 #else
5026   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
5027 #endif
5028
5029   cpi->norm_wiener_variance = 0;
5030
5031   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
5032     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
5033       int idx;
5034       int16_t median_val = 0;
5035       uint8_t *mb_buffer =
5036           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
5037       int64_t wiener_variance = 0;
5038
5039 #if CONFIG_VP9_HIGHBITDEPTH
5040       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5041         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
5042                                   mb_buffer, buf_stride, zero_pred, block_size,
5043                                   xd->bd);
5044         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5045       } else {
5046         vpx_subtract_block(block_size, block_size, src_diff, block_size,
5047                            mb_buffer, buf_stride, zero_pred, block_size);
5048         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5049       }
5050 #else
5051       vpx_subtract_block(block_size, block_size, src_diff, block_size,
5052                          mb_buffer, buf_stride, zero_pred, block_size);
5053       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5054 #endif  // CONFIG_VP9_HIGHBITDEPTH
5055
5056       coeff[0] = 0;
5057       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
5058
5059       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
5060
5061       // Noise level estimation
5062       median_val = coeff[coeff_count / 2];
5063
5064       // Wiener filter
5065       for (idx = 1; idx < coeff_count; ++idx) {
5066         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
5067         int64_t tmp_coeff = (int64_t)coeff[idx];
5068         if (median_val) {
5069           tmp_coeff = (sqr_coeff * coeff[idx]) /
5070                       (sqr_coeff + (int64_t)median_val * median_val);
5071         }
5072         wiener_variance += tmp_coeff * tmp_coeff;
5073       }
5074       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
5075           wiener_variance / coeff_count;
5076       cpi->norm_wiener_variance +=
5077           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
5078       ++count;
5079     }
5080   }
5081
5082   if (count) cpi->norm_wiener_variance /= count;
5083   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
5084 }
5085
5086 #if !CONFIG_REALTIME_ONLY
5087 static void update_encode_frame_result(
5088     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
5089     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
5090     RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
5091     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
5092 #if CONFIG_RATE_CTRL
5093     const PARTITION_INFO *partition_info,
5094     const MOTION_VECTOR_INFO *motion_vector_info,
5095 #endif  // CONFIG_RATE_CTRL
5096     ENCODE_FRAME_RESULT *encode_frame_result);
5097 #endif  // !CONFIG_REALTIME_ONLY
5098
5099 static void encode_frame_to_data_rate(
5100     VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
5101     ENCODE_FRAME_RESULT *encode_frame_result) {
5102   VP9_COMMON *const cm = &cpi->common;
5103   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5104   struct segmentation *const seg = &cm->seg;
5105   TX_SIZE t;
5106
5107   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5108   // No need to set svc.skip_enhancement_layer if whole superframe will be
5109   // dropped.
5110   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5111       cpi->oxcf.target_bandwidth == 0 &&
5112       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5113         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5114          cpi->svc
5115              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5116                                                 1]) &&
5117         cpi->svc.drop_spatial_layer[0])) {
5118     cpi->svc.skip_enhancement_layer = 1;
5119     vp9_rc_postencode_update_drop_frame(cpi);
5120     cpi->ext_refresh_frame_flags_pending = 0;
5121     cpi->last_frame_dropped = 1;
5122     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5123     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5124     vp9_inc_frame_in_layer(cpi);
5125     return;
5126   }
5127
5128   set_ext_overrides(cpi);
5129   vpx_clear_system_state();
5130
5131 #ifdef ENABLE_KF_DENOISE
5132   // Spatial denoise of key frame.
5133   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5134 #endif
5135
5136   if (cm->show_existing_frame == 0) {
5137     // Update frame index
5138     set_frame_index(cpi, cm);
5139
5140     // Set the arf sign bias for this frame.
5141     set_ref_sign_bias(cpi);
5142   }
5143
5144   // Set default state for segment based loop filter update flags.
5145   cm->lf.mode_ref_delta_update = 0;
5146
5147   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5148     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5149
5150   // Set various flags etc to special state if it is a key frame.
5151   if (frame_is_intra_only(cm)) {
5152     // Reset the loop filter deltas and segmentation map.
5153     vp9_reset_segment_features(&cm->seg);
5154
5155     // If segmentation is enabled force a map update for key frames.
5156     if (seg->enabled) {
5157       seg->update_map = 1;
5158       seg->update_data = 1;
5159     }
5160
5161     // The alternate reference frame cannot be active for a key frame.
5162     cpi->rc.source_alt_ref_active = 0;
5163
5164     cm->error_resilient_mode = oxcf->error_resilient_mode;
5165     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5166
5167     // By default, encoder assumes decoder can use prev_mi.
5168     if (cm->error_resilient_mode) {
5169       cm->frame_parallel_decoding_mode = 1;
5170       cm->reset_frame_context = 0;
5171       cm->refresh_frame_context = 0;
5172     } else if (cm->intra_only) {
5173       // Only reset the current context.
5174       cm->reset_frame_context = 2;
5175     }
5176   }
5177
5178   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5179
5180   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5181     init_mb_wiener_var_buffer(cpi);
5182     set_mb_wiener_variance(cpi);
5183   }
5184
5185   vpx_clear_system_state();
5186
5187 #if CONFIG_INTERNAL_STATS
5188   memset(cpi->mode_chosen_counts, 0,
5189          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5190 #endif
5191 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5192   // Backup to ensure consistency between recodes
5193   save_encode_params(cpi);
5194 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5195
5196   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5197     if (!encode_without_recode_loop(cpi, size, dest)) return;
5198   } else {
5199 #if !CONFIG_REALTIME_ONLY
5200     encode_with_recode_loop(cpi, size, dest);
5201 #endif
5202   }
5203
5204   // TODO(jingning): When using show existing frame mode, we assume that the
5205   // current ARF will be directly used as the final reconstructed frame. This is
5206   // an encoder control scheme. One could in principle explore other
5207   // possibilities to arrange the reference frame buffer and their coding order.
5208   if (cm->show_existing_frame) {
5209     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5210                cm->ref_frame_map[cpi->alt_fb_idx]);
5211   }
5212
5213 #if !CONFIG_REALTIME_ONLY
5214   // Disable segmentation if it decrease rate/distortion ratio
5215   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5216     vp9_try_disable_lookahead_aq(cpi, size, dest);
5217 #endif
5218
5219 #if CONFIG_VP9_TEMPORAL_DENOISING
5220 #ifdef OUTPUT_YUV_DENOISED
5221   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5222     vpx_write_yuv_frame(yuv_denoised_file,
5223                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5224   }
5225 #endif
5226 #endif
5227 #ifdef OUTPUT_YUV_SKINMAP
5228   if (cpi->common.current_video_frame > 1) {
5229     vp9_output_skin_map(cpi, yuv_skinmap_file);
5230   }
5231 #endif
5232
5233   // Special case code to reduce pulsing when key frames are forced at a
5234   // fixed interval. Note the reconstruction error if it is the frame before
5235   // the force key frame
5236   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5237 #if CONFIG_VP9_HIGHBITDEPTH
5238     if (cm->use_highbitdepth) {
5239       cpi->ambient_err =
5240           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5241     } else {
5242       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5243     }
5244 #else
5245     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5246 #endif  // CONFIG_VP9_HIGHBITDEPTH
5247   }
5248
5249   // If the encoder forced a KEY_FRAME decision
5250   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5251
5252   cm->frame_to_show = get_frame_new_buffer(cm);
5253   cm->frame_to_show->color_space = cm->color_space;
5254   cm->frame_to_show->color_range = cm->color_range;
5255   cm->frame_to_show->render_width = cm->render_width;
5256   cm->frame_to_show->render_height = cm->render_height;
5257
5258   // Pick the loop filter level for the frame.
5259   loopfilter_frame(cpi, cm);
5260
5261   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5262
5263   // build the bitstream
5264   vp9_pack_bitstream(cpi, dest, size);
5265
5266 #if CONFIG_REALTIME_ONLY
5267   (void)encode_frame_result;
5268   assert(encode_frame_result == NULL);
5269 #else  // CONFIG_REALTIME_ONLY
5270   if (encode_frame_result != NULL) {
5271     const int ref_frame_flags = get_ref_frame_flags(cpi);
5272     const RefCntBuffer *coded_frame_buf =
5273         get_ref_cnt_buffer(cm, cm->new_fb_idx);
5274     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
5275     get_ref_frame_bufs(cpi, ref_frame_bufs);
5276     // update_encode_frame_result() depends on twopass.gf_group.index and
5277     // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
5278     // cpi->alt_fb_idx are updated for current frame and have
5279     // not been updated for the next frame yet.
5280     // The update locations are as follows.
5281     // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
5282     // for the first frame in the gf_group and is updated for the next frame at
5283     // vp9_twopass_postencode_update().
5284     // 2) cpi->Source is updated at the beginning of vp9_get_compressed_data()
5285     // 3) cm->new_fb_idx is updated at the beginning of
5286     // vp9_get_compressed_data() by get_free_fb(cm).
5287     // 4) cpi->lst_fb_idx/gld_fb_idx/alt_fb_idx will be updated for the next
5288     // frame at vp9_update_reference_frames().
5289     // This function needs to be called before vp9_update_reference_frames().
5290     // TODO(angiebird): Improve the codebase to make the update of frame
5291     // dependent variables more robust.
5292     update_encode_frame_result(
5293         ref_frame_flags,
5294         cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
5295         cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
5296         cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
5297 #if CONFIG_RATE_CTRL
5298         cpi->partition_info, cpi->motion_vector_info,
5299 #endif  // CONFIG_RATE_CTRL
5300         encode_frame_result);
5301   }
5302 #endif  // CONFIG_REALTIME_ONLY
5303
5304   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5305       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5306     restore_coding_context(cpi);
5307     return;
5308   }
5309
5310   cpi->last_frame_dropped = 0;
5311   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5312   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5313     cpi->svc.num_encoded_top_layer++;
5314
5315   // Keep track of the frame buffer index updated/refreshed for the
5316   // current encoded TL0 superframe.
5317   if (cpi->svc.temporal_layer_id == 0) {
5318     if (cpi->refresh_last_frame)
5319       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5320     else if (cpi->refresh_golden_frame)
5321       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5322     else if (cpi->refresh_alt_ref_frame)
5323       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5324   }
5325
5326   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5327
5328   if (frame_is_intra_only(cm) == 0) {
5329     release_scaled_references(cpi);
5330   }
5331   vp9_update_reference_frames(cpi);
5332
5333   if (!cm->show_existing_frame) {
5334     for (t = TX_4X4; t <= TX_32X32; ++t) {
5335       full_to_model_counts(cpi->td.counts->coef[t],
5336                            cpi->td.rd_counts.coef_counts[t]);
5337     }
5338
5339     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5340       if (!frame_is_intra_only(cm)) {
5341         vp9_adapt_mode_probs(cm);
5342         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5343       }
5344       vp9_adapt_coef_probs(cm);
5345     }
5346   }
5347
5348   cpi->ext_refresh_frame_flags_pending = 0;
5349
5350   if (cpi->refresh_golden_frame == 1)
5351     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5352   else
5353     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5354
5355   if (cpi->refresh_alt_ref_frame == 1)
5356     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5357   else
5358     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5359
5360   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5361
5362   cm->last_frame_type = cm->frame_type;
5363
5364   vp9_rc_postencode_update(cpi, *size);
5365
5366   *size = VPXMAX(1, *size);
5367
5368 #if 0
5369   output_frame_level_debug_stats(cpi);
5370 #endif
5371
5372   if (cm->frame_type == KEY_FRAME) {
5373     // Tell the caller that the frame was coded as a key frame
5374     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5375   } else {
5376     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5377   }
5378
5379   // Clear the one shot update flags for segmentation map and mode/ref loop
5380   // filter deltas.
5381   cm->seg.update_map = 0;
5382   cm->seg.update_data = 0;
5383   cm->lf.mode_ref_delta_update = 0;
5384
5385   // keep track of the last coded dimensions
5386   cm->last_width = cm->width;
5387   cm->last_height = cm->height;
5388
5389   // reset to normal state now that we are done.
5390   if (!cm->show_existing_frame) {
5391     cm->last_show_frame = cm->show_frame;
5392     cm->prev_frame = cm->cur_frame;
5393   }
5394
5395   if (cm->show_frame) {
5396     vp9_swap_mi_and_prev_mi(cm);
5397     // Don't increment frame counters if this was an altref buffer
5398     // update not a real frame
5399     update_frame_indexes(cm, cm->show_frame);
5400     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5401   }
5402
5403   if (cpi->use_svc) {
5404     cpi->svc
5405         .layer_context[cpi->svc.spatial_layer_id *
5406                            cpi->svc.number_temporal_layers +
5407                        cpi->svc.temporal_layer_id]
5408         .last_frame_type = cm->frame_type;
5409     // Reset layer_sync back to 0 for next frame.
5410     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5411   }
5412
5413   cpi->force_update_segmentation = 0;
5414
5415 #if !CONFIG_REALTIME_ONLY
5416   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5417     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5418 #endif
5419
5420   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5421   cpi->svc.set_intra_only_frame = 0;
5422 }
5423
5424 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5425                       unsigned int *frame_flags) {
5426   vp9_rc_get_svc_params(cpi);
5427   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5428                             /*encode_frame_result = */ NULL);
5429 }
5430
5431 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5432                         unsigned int *frame_flags) {
5433   if (cpi->oxcf.rc_mode == VPX_CBR) {
5434     vp9_rc_get_one_pass_cbr_params(cpi);
5435   } else {
5436     vp9_rc_get_one_pass_vbr_params(cpi);
5437   }
5438   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5439                             /*encode_frame_result = */ NULL);
5440 }
5441
5442 #if !CONFIG_REALTIME_ONLY
5443 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5444                         unsigned int *frame_flags,
5445                         ENCODE_FRAME_RESULT *encode_frame_result) {
5446   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5447 #if CONFIG_MISMATCH_DEBUG
5448   mismatch_move_frame_idx_w();
5449 #endif
5450   encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
5451 }
5452 #endif  // !CONFIG_REALTIME_ONLY
5453
5454 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5455                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5456                           int64_t end_time) {
5457   VP9_COMMON *const cm = &cpi->common;
5458   struct vpx_usec_timer timer;
5459   int res = 0;
5460   const int subsampling_x = sd->subsampling_x;
5461   const int subsampling_y = sd->subsampling_y;
5462 #if CONFIG_VP9_HIGHBITDEPTH
5463   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5464 #else
5465   const int use_highbitdepth = 0;
5466 #endif
5467
5468   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5469 #if CONFIG_VP9_TEMPORAL_DENOISING
5470   setup_denoiser_buffer(cpi);
5471 #endif
5472
5473   alloc_raw_frame_buffers(cpi);
5474
5475   vpx_usec_timer_start(&timer);
5476
5477   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5478                          use_highbitdepth, frame_flags))
5479     res = -1;
5480   vpx_usec_timer_mark(&timer);
5481   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5482
5483   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5484       (subsampling_x != 1 || subsampling_y != 1)) {
5485     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5486                        "Non-4:2:0 color format requires profile 1 or 3");
5487     res = -1;
5488   }
5489   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5490       (subsampling_x == 1 && subsampling_y == 1)) {
5491     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5492                        "4:2:0 color format requires profile 0 or 2");
5493     res = -1;
5494   }
5495
5496   return res;
5497 }
5498
5499 static int frame_is_reference(const VP9_COMP *cpi) {
5500   const VP9_COMMON *cm = &cpi->common;
5501
5502   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5503          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5504          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5505          cm->seg.update_map || cm->seg.update_data;
5506 }
5507
5508 static void adjust_frame_rate(VP9_COMP *cpi,
5509                               const struct lookahead_entry *source) {
5510   int64_t this_duration;
5511   int step = 0;
5512
5513   if (source->ts_start == cpi->first_time_stamp_ever) {
5514     this_duration = source->ts_end - source->ts_start;
5515     step = 1;
5516   } else {
5517     int64_t last_duration =
5518         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5519
5520     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5521
5522     // do a step update if the duration changes by 10%
5523     if (last_duration)
5524       step = (int)((this_duration - last_duration) * 10 / last_duration);
5525   }
5526
5527   if (this_duration) {
5528     if (step) {
5529       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5530     } else {
5531       // Average this frame's rate into the last second's average
5532       // frame rate. If we haven't seen 1 second yet, then average
5533       // over the whole interval seen.
5534       const double interval = VPXMIN(
5535           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5536       double avg_duration = 10000000.0 / cpi->framerate;
5537       avg_duration *= (interval - avg_duration + this_duration);
5538       avg_duration /= interval;
5539
5540       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5541     }
5542   }
5543   cpi->last_time_stamp_seen = source->ts_start;
5544   cpi->last_end_time_stamp_seen = source->ts_end;
5545 }
5546
5547 // Returns 0 if this is not an alt ref else the offset of the source frame
5548 // used as the arf midpoint.
5549 static int get_arf_src_index(VP9_COMP *cpi) {
5550   RATE_CONTROL *const rc = &cpi->rc;
5551   int arf_src_index = 0;
5552   if (is_altref_enabled(cpi)) {
5553     if (cpi->oxcf.pass == 2) {
5554       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5555       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5556         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5557       }
5558     } else if (rc->source_alt_ref_pending) {
5559       arf_src_index = rc->frames_till_gf_update_due;
5560     }
5561   }
5562   return arf_src_index;
5563 }
5564
5565 static void check_src_altref(VP9_COMP *cpi,
5566                              const struct lookahead_entry *source) {
5567   RATE_CONTROL *const rc = &cpi->rc;
5568
5569   if (cpi->oxcf.pass == 2) {
5570     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5571     rc->is_src_frame_alt_ref =
5572         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5573   } else {
5574     rc->is_src_frame_alt_ref =
5575         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5576   }
5577
5578   if (rc->is_src_frame_alt_ref) {
5579     // Current frame is an ARF overlay frame.
5580     cpi->alt_ref_source = NULL;
5581
5582     // Don't refresh the last buffer for an ARF overlay frame. It will
5583     // become the GF so preserve last as an alternative prediction option.
5584     cpi->refresh_last_frame = 0;
5585   }
5586 }
5587
5588 #if CONFIG_INTERNAL_STATS
5589 static void adjust_image_stat(double y, double u, double v, double all,
5590                               ImageStat *s) {
5591   s->stat[Y] += y;
5592   s->stat[U] += u;
5593   s->stat[V] += v;
5594   s->stat[ALL] += all;
5595   s->worst = VPXMIN(s->worst, all);
5596 }
5597 #endif  // CONFIG_INTERNAL_STATS
5598
5599 // Adjust the maximum allowable frame size for the target level.
5600 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5601   RATE_CONTROL *const rc = &cpi->rc;
5602   LevelConstraint *const ls = &cpi->level_constraint;
5603   VP9_COMMON *const cm = &cpi->common;
5604   const double max_cpb_size = ls->max_cpb_size;
5605   vpx_clear_system_state();
5606   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5607   if (frame_is_intra_only(cm)) {
5608     rc->max_frame_bandwidth =
5609         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5610   } else if (arf_src_index > 0) {
5611     rc->max_frame_bandwidth =
5612         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5613   } else {
5614     rc->max_frame_bandwidth =
5615         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5616   }
5617 }
5618
5619 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5620   VP9_COMMON *const cm = &cpi->common;
5621   Vp9LevelInfo *const level_info = &cpi->level_info;
5622   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5623   Vp9LevelStats *const level_stats = &level_info->level_stats;
5624   int i, idx;
5625   uint64_t luma_samples, dur_end;
5626   const uint32_t luma_pic_size = cm->width * cm->height;
5627   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5628   LevelConstraint *const level_constraint = &cpi->level_constraint;
5629   const int8_t level_index = level_constraint->level_index;
5630   double cpb_data_size;
5631
5632   vpx_clear_system_state();
5633
5634   // update level_stats
5635   level_stats->total_compressed_size += *size;
5636   if (cm->show_frame) {
5637     level_stats->total_uncompressed_size +=
5638         luma_pic_size +
5639         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5640     level_stats->time_encoded =
5641         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5642         (double)TICKS_PER_SEC;
5643   }
5644
5645   if (arf_src_index > 0) {
5646     if (!level_stats->seen_first_altref) {
5647       level_stats->seen_first_altref = 1;
5648     } else if (level_stats->frames_since_last_altref <
5649                level_spec->min_altref_distance) {
5650       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5651     }
5652     level_stats->frames_since_last_altref = 0;
5653   } else {
5654     ++level_stats->frames_since_last_altref;
5655   }
5656
5657   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5658     idx = (level_stats->frame_window_buffer.start +
5659            level_stats->frame_window_buffer.len++) %
5660           FRAME_WINDOW_SIZE;
5661   } else {
5662     idx = level_stats->frame_window_buffer.start;
5663     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5664   }
5665   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5666   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5667   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5668
5669   if (cm->frame_type == KEY_FRAME) {
5670     level_stats->ref_refresh_map = 0;
5671   } else {
5672     int count = 0;
5673     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5674     // Also need to consider the case where the encoder refers to a buffer
5675     // that has been implicitly refreshed after encoding a keyframe.
5676     if (!cm->intra_only) {
5677       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5678       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5679       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5680     }
5681     for (i = 0; i < REF_FRAMES; ++i) {
5682       count += (level_stats->ref_refresh_map >> i) & 1;
5683     }
5684     if (count > level_spec->max_ref_frame_buffers) {
5685       level_spec->max_ref_frame_buffers = count;
5686     }
5687   }
5688
5689   // update average_bitrate
5690   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5691                                 125.0 / level_stats->time_encoded;
5692
5693   // update max_luma_sample_rate
5694   luma_samples = 0;
5695   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5696     idx = (level_stats->frame_window_buffer.start +
5697            level_stats->frame_window_buffer.len - 1 - i) %
5698           FRAME_WINDOW_SIZE;
5699     if (i == 0) {
5700       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5701     }
5702     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5703         TICKS_PER_SEC) {
5704       break;
5705     }
5706     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5707   }
5708   if (luma_samples > level_spec->max_luma_sample_rate) {
5709     level_spec->max_luma_sample_rate = luma_samples;
5710   }
5711
5712   // update max_cpb_size
5713   cpb_data_size = 0;
5714   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5715     if (i >= level_stats->frame_window_buffer.len) break;
5716     idx = (level_stats->frame_window_buffer.start +
5717            level_stats->frame_window_buffer.len - 1 - i) %
5718           FRAME_WINDOW_SIZE;
5719     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5720   }
5721   cpb_data_size = cpb_data_size / 125.0;
5722   if (cpb_data_size > level_spec->max_cpb_size) {
5723     level_spec->max_cpb_size = cpb_data_size;
5724   }
5725
5726   // update max_luma_picture_size
5727   if (luma_pic_size > level_spec->max_luma_picture_size) {
5728     level_spec->max_luma_picture_size = luma_pic_size;
5729   }
5730
5731   // update max_luma_picture_breadth
5732   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5733     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5734   }
5735
5736   // update compression_ratio
5737   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5738                                   cm->bit_depth /
5739                                   level_stats->total_compressed_size / 8.0;
5740
5741   // update max_col_tiles
5742   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5743     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5744   }
5745
5746   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5747     if (level_spec->max_luma_picture_size >
5748         vp9_level_defs[level_index].max_luma_picture_size) {
5749       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5750       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5751                          "Failed to encode to the target level %d. %s",
5752                          vp9_level_defs[level_index].level,
5753                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5754     }
5755
5756     if (level_spec->max_luma_picture_breadth >
5757         vp9_level_defs[level_index].max_luma_picture_breadth) {
5758       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5759       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5760                          "Failed to encode to the target level %d. %s",
5761                          vp9_level_defs[level_index].level,
5762                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5763     }
5764
5765     if ((double)level_spec->max_luma_sample_rate >
5766         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5767             (1 + SAMPLE_RATE_GRACE_P)) {
5768       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5769       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5770                          "Failed to encode to the target level %d. %s",
5771                          vp9_level_defs[level_index].level,
5772                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5773     }
5774
5775     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5776       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5777       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5778                          "Failed to encode to the target level %d. %s",
5779                          vp9_level_defs[level_index].level,
5780                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5781     }
5782
5783     if (level_spec->min_altref_distance <
5784         vp9_level_defs[level_index].min_altref_distance) {
5785       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5786       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5787                          "Failed to encode to the target level %d. %s",
5788                          vp9_level_defs[level_index].level,
5789                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5790     }
5791
5792     if (level_spec->max_ref_frame_buffers >
5793         vp9_level_defs[level_index].max_ref_frame_buffers) {
5794       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5795       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5796                          "Failed to encode to the target level %d. %s",
5797                          vp9_level_defs[level_index].level,
5798                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5799     }
5800
5801     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5802       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5803       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5804                          "Failed to encode to the target level %d. %s",
5805                          vp9_level_defs[level_index].level,
5806                          level_fail_messages[CPB_TOO_LARGE]);
5807     }
5808
5809     // Set an upper bound for the next frame size. It will be used in
5810     // level_rc_framerate() before encoding the next frame.
5811     cpb_data_size = 0;
5812     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5813       if (i >= level_stats->frame_window_buffer.len) break;
5814       idx = (level_stats->frame_window_buffer.start +
5815              level_stats->frame_window_buffer.len - 1 - i) %
5816             FRAME_WINDOW_SIZE;
5817       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5818     }
5819     cpb_data_size = cpb_data_size / 125.0;
5820     level_constraint->max_frame_size =
5821         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
5822               1000.0);
5823     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
5824       level_constraint->max_frame_size >>= 1;
5825   }
5826 }
5827
5828 typedef struct GF_PICTURE {
5829   YV12_BUFFER_CONFIG *frame;
5830   int ref_frame[3];
5831   FRAME_UPDATE_TYPE update_type;
5832 } GF_PICTURE;
5833
5834 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
5835                             const GF_GROUP *gf_group, int *tpl_group_frames) {
5836   VP9_COMMON *cm = &cpi->common;
5837   int frame_idx = 0;
5838   int i;
5839   int gld_index = -1;
5840   int alt_index = -1;
5841   int lst_index = -1;
5842   int arf_index_stack[MAX_ARF_LAYERS];
5843   int arf_stack_size = 0;
5844   int extend_frame_count = 0;
5845   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
5846   int frame_gop_offset = 0;
5847
5848   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
5849   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
5850
5851   memset(recon_frame_index, -1, sizeof(recon_frame_index));
5852   stack_init(arf_index_stack, MAX_ARF_LAYERS);
5853
5854   // TODO(jingning): To be used later for gf frame type parsing.
5855   (void)gf_group;
5856
5857   for (i = 0; i < FRAME_BUFFERS; ++i) {
5858     if (frame_bufs[i].ref_count == 0) {
5859       alloc_frame_mvs(cm, i);
5860       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
5861                                    cm->subsampling_x, cm->subsampling_y,
5862 #if CONFIG_VP9_HIGHBITDEPTH
5863                                    cm->use_highbitdepth,
5864 #endif
5865                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
5866                                    NULL, NULL, NULL))
5867         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
5868                            "Failed to allocate frame buffer");
5869
5870       recon_frame_index[frame_idx] = i;
5871       ++frame_idx;
5872
5873       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
5874     }
5875   }
5876
5877   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
5878     assert(recon_frame_index[i] >= 0);
5879     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
5880   }
5881
5882   *tpl_group_frames = 0;
5883
5884   // Initialize Golden reference frame.
5885   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
5886   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
5887   gf_picture[0].update_type = gf_group->update_type[0];
5888   gld_index = 0;
5889   ++*tpl_group_frames;
5890
5891   // Initialize base layer ARF frame
5892   gf_picture[1].frame = cpi->Source;
5893   gf_picture[1].ref_frame[0] = gld_index;
5894   gf_picture[1].ref_frame[1] = lst_index;
5895   gf_picture[1].ref_frame[2] = alt_index;
5896   gf_picture[1].update_type = gf_group->update_type[1];
5897   alt_index = 1;
5898   ++*tpl_group_frames;
5899
5900   // Initialize P frames
5901   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5902     struct lookahead_entry *buf;
5903     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
5904     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5905
5906     if (buf == NULL) break;
5907
5908     gf_picture[frame_idx].frame = &buf->img;
5909     gf_picture[frame_idx].ref_frame[0] = gld_index;
5910     gf_picture[frame_idx].ref_frame[1] = lst_index;
5911     gf_picture[frame_idx].ref_frame[2] = alt_index;
5912     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
5913
5914     switch (gf_group->update_type[frame_idx]) {
5915       case ARF_UPDATE:
5916         stack_push(arf_index_stack, alt_index, arf_stack_size);
5917         ++arf_stack_size;
5918         alt_index = frame_idx;
5919         break;
5920       case LF_UPDATE: lst_index = frame_idx; break;
5921       case OVERLAY_UPDATE:
5922         gld_index = frame_idx;
5923         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5924         --arf_stack_size;
5925         break;
5926       case USE_BUF_FRAME:
5927         lst_index = alt_index;
5928         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5929         --arf_stack_size;
5930         break;
5931       default: break;
5932     }
5933
5934     ++*tpl_group_frames;
5935
5936     // The length of group of pictures is baseline_gf_interval, plus the
5937     // beginning golden frame from last GOP, plus the last overlay frame in
5938     // the same GOP.
5939     if (frame_idx == gf_group->gf_group_size) break;
5940   }
5941
5942   alt_index = -1;
5943   ++frame_idx;
5944   ++frame_gop_offset;
5945
5946   // Extend two frames outside the current gf group.
5947   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
5948     struct lookahead_entry *buf =
5949         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5950
5951     if (buf == NULL) break;
5952
5953     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
5954
5955     gf_picture[frame_idx].frame = &buf->img;
5956     gf_picture[frame_idx].ref_frame[0] = gld_index;
5957     gf_picture[frame_idx].ref_frame[1] = lst_index;
5958     gf_picture[frame_idx].ref_frame[2] = alt_index;
5959     gf_picture[frame_idx].update_type = LF_UPDATE;
5960     lst_index = frame_idx;
5961     ++*tpl_group_frames;
5962     ++extend_frame_count;
5963     ++frame_gop_offset;
5964   }
5965 }
5966
5967 static void init_tpl_stats(VP9_COMP *cpi) {
5968   int frame_idx;
5969   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5970     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
5971     memset(tpl_frame->tpl_stats_ptr, 0,
5972            tpl_frame->height * tpl_frame->width *
5973                sizeof(*tpl_frame->tpl_stats_ptr));
5974     tpl_frame->is_valid = 0;
5975   }
5976 }
5977
5978 #if CONFIG_NON_GREEDY_MV
5979 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5980                                          MotionField *motion_field,
5981                                          int frame_idx, uint8_t *cur_frame_buf,
5982                                          uint8_t *ref_frame_buf, int stride,
5983                                          BLOCK_SIZE bsize, int mi_row,
5984                                          int mi_col, MV *mv) {
5985   MACROBLOCK *const x = &td->mb;
5986   MACROBLOCKD *const xd = &x->e_mbd;
5987   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5988   int step_param;
5989   uint32_t bestsme = UINT_MAX;
5990   const MvLimits tmp_mv_limits = x->mv_limits;
5991   // lambda is used to adjust the importance of motion vector consistency.
5992   // TODO(angiebird): Figure out lambda's proper value.
5993   const int lambda = cpi->tpl_stats[frame_idx].lambda;
5994   int_mv nb_full_mvs[NB_MVS_NUM];
5995   int nb_full_mv_num;
5996
5997   MV best_ref_mv1 = { 0, 0 };
5998   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5999
6000   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6001   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6002
6003   // Setup frame pointers
6004   x->plane[0].src.buf = cur_frame_buf;
6005   x->plane[0].src.stride = stride;
6006   xd->plane[0].pre[0].buf = ref_frame_buf;
6007   xd->plane[0].pre[0].stride = stride;
6008
6009   step_param = mv_sf->reduce_first_step_size;
6010   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6011
6012   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6013
6014   nb_full_mv_num =
6015       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
6016   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
6017                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
6018
6019   /* restore UMV window */
6020   x->mv_limits = tmp_mv_limits;
6021
6022   return bestsme;
6023 }
6024
6025 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6026                                         uint8_t *cur_frame_buf,
6027                                         uint8_t *ref_frame_buf, int stride,
6028                                         BLOCK_SIZE bsize, MV *mv) {
6029   MACROBLOCK *const x = &td->mb;
6030   MACROBLOCKD *const xd = &x->e_mbd;
6031   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6032   uint32_t bestsme = UINT_MAX;
6033   uint32_t distortion;
6034   uint32_t sse;
6035   int cost_list[5];
6036
6037   MV best_ref_mv1 = { 0, 0 };
6038
6039   // Setup frame pointers
6040   x->plane[0].src.buf = cur_frame_buf;
6041   x->plane[0].src.stride = stride;
6042   xd->plane[0].pre[0].buf = ref_frame_buf;
6043   xd->plane[0].pre[0].stride = stride;
6044
6045   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6046   // Ignore mv costing by sending NULL pointer instead of cost array
6047   bestsme = cpi->find_fractional_mv_step(
6048       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6049       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6050       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6051       USE_2_TAPS);
6052
6053   return bestsme;
6054 }
6055
6056 #else  // CONFIG_NON_GREEDY_MV
6057 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
6058                                               uint8_t *cur_frame_buf,
6059                                               uint8_t *ref_frame_buf,
6060                                               int stride, BLOCK_SIZE bsize,
6061                                               MV *mv) {
6062   MACROBLOCK *const x = &td->mb;
6063   MACROBLOCKD *const xd = &x->e_mbd;
6064   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6065   const SEARCH_METHODS search_method = NSTEP;
6066   int step_param;
6067   int sadpb = x->sadperbit16;
6068   uint32_t bestsme = UINT_MAX;
6069   uint32_t distortion;
6070   uint32_t sse;
6071   int cost_list[5];
6072   const MvLimits tmp_mv_limits = x->mv_limits;
6073
6074   MV best_ref_mv1 = { 0, 0 };
6075   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6076
6077   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6078   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6079
6080   // Setup frame pointers
6081   x->plane[0].src.buf = cur_frame_buf;
6082   x->plane[0].src.stride = stride;
6083   xd->plane[0].pre[0].buf = ref_frame_buf;
6084   xd->plane[0].pre[0].stride = stride;
6085
6086   step_param = mv_sf->reduce_first_step_size;
6087   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6088
6089   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6090
6091   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
6092                         search_method, sadpb, cond_cost_list(cpi, cost_list),
6093                         &best_ref_mv1, mv, 0, 0);
6094
6095   /* restore UMV window */
6096   x->mv_limits = tmp_mv_limits;
6097
6098   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6099   // Ignore mv costing by sending NULL pointer instead of cost array
6100   bestsme = cpi->find_fractional_mv_step(
6101       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6102       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6103       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6104       USE_2_TAPS);
6105
6106   return bestsme;
6107 }
6108 #endif
6109
6110 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6111                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6112   int width = 0, height = 0;
6113   int bw = 4 << b_width_log2_lookup[bsize];
6114   int bh = 4 << b_height_log2_lookup[bsize];
6115
6116   switch (block) {
6117     case 0:
6118       width = grid_pos_col + bw - ref_pos_col;
6119       height = grid_pos_row + bh - ref_pos_row;
6120       break;
6121     case 1:
6122       width = ref_pos_col + bw - grid_pos_col;
6123       height = grid_pos_row + bh - ref_pos_row;
6124       break;
6125     case 2:
6126       width = grid_pos_col + bw - ref_pos_col;
6127       height = ref_pos_row + bh - grid_pos_row;
6128       break;
6129     case 3:
6130       width = ref_pos_col + bw - grid_pos_col;
6131       height = ref_pos_row + bh - grid_pos_row;
6132       break;
6133     default: assert(0);
6134   }
6135
6136   return width * height;
6137 }
6138
6139 static int round_floor(int ref_pos, int bsize_pix) {
6140   int round;
6141   if (ref_pos < 0)
6142     round = -(1 + (-ref_pos - 1) / bsize_pix);
6143   else
6144     round = ref_pos / bsize_pix;
6145
6146   return round;
6147 }
6148
6149 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6150                             BLOCK_SIZE bsize, int stride) {
6151   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6152   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6153   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6154   int idx, idy;
6155
6156   for (idy = 0; idy < mi_height; ++idy) {
6157     for (idx = 0; idx < mi_width; ++idx) {
6158       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6159       const int64_t mc_flow = tpl_ptr->mc_flow;
6160       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6161       *tpl_ptr = *src_stats;
6162       tpl_ptr->mc_flow = mc_flow;
6163       tpl_ptr->mc_ref_cost = mc_ref_cost;
6164       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6165     }
6166   }
6167 }
6168
6169 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6170                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6171   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6172   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6173   MV mv = tpl_stats->mv.as_mv;
6174   int mv_row = mv.row >> 3;
6175   int mv_col = mv.col >> 3;
6176
6177   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6178   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6179
6180   const int bw = 4 << b_width_log2_lookup[bsize];
6181   const int bh = 4 << b_height_log2_lookup[bsize];
6182   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6183   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6184   const int pix_num = bw * bh;
6185
6186   // top-left on grid block location in pixel
6187   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6188   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6189   int block;
6190
6191   for (block = 0; block < 4; ++block) {
6192     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6193     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6194
6195     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6196         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6197       int overlap_area = get_overlap_area(
6198           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6199       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6200       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6201
6202       int64_t mc_flow = tpl_stats->mc_dep_cost -
6203                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6204                             tpl_stats->intra_cost;
6205
6206       int idx, idy;
6207
6208       for (idy = 0; idy < mi_height; ++idy) {
6209         for (idx = 0; idx < mi_width; ++idx) {
6210           TplDepStats *des_stats =
6211               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6212                          (ref_mi_col + idx)];
6213
6214           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6215           des_stats->mc_ref_cost +=
6216               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6217               pix_num;
6218           assert(overlap_area >= 0);
6219         }
6220       }
6221     }
6222   }
6223 }
6224
6225 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6226                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6227   int idx, idy;
6228   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6229   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6230
6231   for (idy = 0; idy < mi_height; ++idy) {
6232     for (idx = 0; idx < mi_width; ++idx) {
6233       TplDepStats *tpl_ptr =
6234           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6235       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6236                          BLOCK_8X8);
6237     }
6238   }
6239 }
6240
6241 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6242                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6243                                TX_SIZE tx_size, int64_t *recon_error,
6244                                int64_t *sse) {
6245   MACROBLOCKD *const xd = &x->e_mbd;
6246   const struct macroblock_plane *const p = &x->plane[plane];
6247   const struct macroblockd_plane *const pd = &xd->plane[plane];
6248   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6249   uint16_t eob;
6250   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6251   const int shift = tx_size == TX_32X32 ? 0 : 2;
6252
6253 #if CONFIG_VP9_HIGHBITDEPTH
6254   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6255     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6256                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6257                                  &eob, scan_order->scan, scan_order->iscan);
6258   } else {
6259     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6260                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6261                           scan_order->scan, scan_order->iscan);
6262   }
6263 #else
6264   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6265                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6266                         scan_order->iscan);
6267 #endif  // CONFIG_VP9_HIGHBITDEPTH
6268
6269   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6270   *recon_error = VPXMAX(*recon_error, 1);
6271
6272   *sse = (*sse) >> shift;
6273   *sse = VPXMAX(*sse, 1);
6274 }
6275
6276 #if CONFIG_VP9_HIGHBITDEPTH
6277 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6278                          TX_SIZE tx_size) {
6279   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6280   switch (tx_size) {
6281     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6282     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6283     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6284     default: assert(0);
6285   }
6286 }
6287 #endif  // CONFIG_VP9_HIGHBITDEPTH
6288
6289 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6290                   TX_SIZE tx_size) {
6291   switch (tx_size) {
6292     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6293     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6294     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6295     default: assert(0);
6296   }
6297 }
6298
6299 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6300                           int mi_col) {
6301   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6302   x->mv_limits.row_max =
6303       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6304   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6305   x->mv_limits.col_max =
6306       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6307 }
6308
6309 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6310                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6311                             int frame_idx, TplDepFrame *tpl_frame,
6312                             int16_t *src_diff, tran_low_t *coeff,
6313                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6314                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6315                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6316                             int64_t *recon_error, int64_t *sse) {
6317   VP9_COMMON *cm = &cpi->common;
6318   ThreadData *td = &cpi->td;
6319
6320   const int bw = 4 << b_width_log2_lookup[bsize];
6321   const int bh = 4 << b_height_log2_lookup[bsize];
6322   const int pix_num = bw * bh;
6323   int best_rf_idx = -1;
6324   int_mv best_mv;
6325   int64_t best_inter_cost = INT64_MAX;
6326   int64_t inter_cost;
6327   int rf_idx;
6328   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6329
6330   int64_t best_intra_cost = INT64_MAX;
6331   int64_t intra_cost;
6332   PREDICTION_MODE mode;
6333   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6334   MODE_INFO mi_above, mi_left;
6335   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6336   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6337   TplDepStats *tpl_stats =
6338       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6339
6340   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6341   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6342   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6343   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6344   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6345   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6346
6347   // Intra prediction search
6348   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6349     uint8_t *src, *dst;
6350     int src_stride, dst_stride;
6351
6352     src = xd->cur_buf->y_buffer + mb_y_offset;
6353     src_stride = xd->cur_buf->y_stride;
6354
6355     dst = &predictor[0];
6356     dst_stride = bw;
6357
6358     xd->mi[0]->sb_type = bsize;
6359     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6360
6361     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6362                             src_stride, dst, dst_stride, 0, 0, 0);
6363
6364 #if CONFIG_VP9_HIGHBITDEPTH
6365     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6366       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6367                                 dst_stride, xd->bd);
6368       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6369       intra_cost = vpx_highbd_satd(coeff, pix_num);
6370     } else {
6371       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6372                          dst_stride);
6373       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6374       intra_cost = vpx_satd(coeff, pix_num);
6375     }
6376 #else
6377     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6378     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6379     intra_cost = vpx_satd(coeff, pix_num);
6380 #endif  // CONFIG_VP9_HIGHBITDEPTH
6381
6382     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6383   }
6384
6385   // Motion compensated prediction
6386   best_mv.as_int = 0;
6387
6388   set_mv_limits(cm, x, mi_row, mi_col);
6389
6390   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6391     int_mv mv;
6392 #if CONFIG_NON_GREEDY_MV
6393     MotionField *motion_field;
6394 #endif
6395     if (ref_frame[rf_idx] == NULL) continue;
6396
6397 #if CONFIG_NON_GREEDY_MV
6398     (void)td;
6399     motion_field = vp9_motion_field_info_get_motion_field(
6400         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6401     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6402 #else
6403     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6404                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6405                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6406 #endif
6407
6408 #if CONFIG_VP9_HIGHBITDEPTH
6409     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6410       vp9_highbd_build_inter_predictor(
6411           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6412           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6413           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6414           mi_row * MI_SIZE, xd->bd);
6415       vpx_highbd_subtract_block(
6416           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6417           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6418       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6419       inter_cost = vpx_highbd_satd(coeff, pix_num);
6420     } else {
6421       vp9_build_inter_predictor(
6422           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6423           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6424           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6425       vpx_subtract_block(bh, bw, src_diff, bw,
6426                          xd->cur_buf->y_buffer + mb_y_offset,
6427                          xd->cur_buf->y_stride, &predictor[0], bw);
6428       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6429       inter_cost = vpx_satd(coeff, pix_num);
6430     }
6431 #else
6432     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6433                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6434                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6435                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6436     vpx_subtract_block(bh, bw, src_diff, bw,
6437                        xd->cur_buf->y_buffer + mb_y_offset,
6438                        xd->cur_buf->y_stride, &predictor[0], bw);
6439     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6440     inter_cost = vpx_satd(coeff, pix_num);
6441 #endif
6442
6443     if (inter_cost < best_inter_cost) {
6444       best_rf_idx = rf_idx;
6445       best_inter_cost = inter_cost;
6446       best_mv.as_int = mv.as_int;
6447       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6448                          sse);
6449     }
6450   }
6451   best_intra_cost = VPXMAX(best_intra_cost, 1);
6452   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6453   tpl_stats->inter_cost = VPXMAX(
6454       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6455   tpl_stats->intra_cost = VPXMAX(
6456       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6457   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6458   tpl_stats->mv.as_int = best_mv.as_int;
6459 }
6460
6461 #if CONFIG_NON_GREEDY_MV
6462 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6463                                   int frame_idx, int rf_idx, int mi_row,
6464                                   int mi_col, struct buf_2d *src,
6465                                   struct buf_2d *pre) {
6466   const int mb_y_offset =
6467       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6468   YV12_BUFFER_CONFIG *ref_frame = NULL;
6469   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6470   if (ref_frame_idx != -1) {
6471     ref_frame = gf_picture[ref_frame_idx].frame;
6472     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6473     src->stride = xd->cur_buf->y_stride;
6474     pre->buf = ref_frame->y_buffer + mb_y_offset;
6475     pre->stride = ref_frame->y_stride;
6476     assert(src->stride == pre->stride);
6477     return 1;
6478   } else {
6479     printf("invalid ref_frame_idx");
6480     assert(ref_frame_idx != -1);
6481     return 0;
6482   }
6483 }
6484
6485 #define kMvPreCheckLines 5
6486 #define kMvPreCheckSize 15
6487
6488 #define MV_REF_POS_NUM 3
6489 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6490   { -1, 0 },
6491   { 0, -1 },
6492   { -1, -1 },
6493 };
6494
6495 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6496                              int mi_col) {
6497   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6498 }
6499
6500 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6501                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6502   int i;
6503   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6504   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6505   int_mv nearest_mv, near_mv, invalid_mv;
6506   nearest_mv.as_int = INVALID_MV;
6507   near_mv.as_int = INVALID_MV;
6508   invalid_mv.as_int = INVALID_MV;
6509   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6510     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6511     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6512     assert(mv_ref_pos[i].row <= 0);
6513     assert(mv_ref_pos[i].col <= 0);
6514     if (nb_row >= 0 && nb_col >= 0) {
6515       if (nearest_mv.as_int == INVALID_MV) {
6516         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6517       } else {
6518         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6519         if (mv.as_int == nearest_mv.as_int) {
6520           continue;
6521         } else {
6522           near_mv = mv;
6523           break;
6524         }
6525       }
6526     }
6527   }
6528   if (nearest_mv.as_int == INVALID_MV) {
6529     nearest_mv.as_mv.row = 0;
6530     nearest_mv.as_mv.col = 0;
6531   }
6532   if (near_mv.as_int == INVALID_MV) {
6533     near_mv.as_mv.row = 0;
6534     near_mv.as_mv.col = 0;
6535   }
6536   if (mv_mode == NEAREST_MV_MODE) {
6537     return nearest_mv;
6538   }
6539   if (mv_mode == NEAR_MV_MODE) {
6540     return near_mv;
6541   }
6542   assert(0);
6543   return invalid_mv;
6544 }
6545
6546 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6547                                   MotionField *motion_field,
6548                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6549                                   int mi_row, int mi_col) {
6550   int_mv mv;
6551   switch (mv_mode) {
6552     case ZERO_MV_MODE:
6553       mv.as_mv.row = 0;
6554       mv.as_mv.col = 0;
6555       break;
6556     case NEW_MV_MODE:
6557       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6558       break;
6559     case NEAREST_MV_MODE:
6560       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6561       break;
6562     case NEAR_MV_MODE:
6563       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6564       break;
6565     default:
6566       mv.as_int = INVALID_MV;
6567       assert(0);
6568       break;
6569   }
6570   return mv;
6571 }
6572
6573 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6574                           GF_PICTURE *gf_picture, MotionField *motion_field,
6575                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6576                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6577                           int_mv *mv) {
6578   uint32_t sse;
6579   struct buf_2d src;
6580   struct buf_2d pre;
6581   MV full_mv;
6582   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6583                             mi_row, mi_col);
6584   full_mv = get_full_mv(&mv->as_mv);
6585   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6586                              &src, &pre)) {
6587     // TODO(angiebird): Consider subpixel when computing the sse.
6588     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6589                           pre.stride, &sse);
6590     return (double)(sse << VP9_DIST_SCALE_LOG2);
6591   } else {
6592     assert(0);
6593     return 0;
6594   }
6595 }
6596
6597 static int get_mv_mode_cost(int mv_mode) {
6598   // TODO(angiebird): The probabilities are roughly inferred from
6599   // default_inter_mode_probs. Check if there is a better way to set the
6600   // probabilities.
6601   const int zero_mv_prob = 16;
6602   const int new_mv_prob = 24 * 1;
6603   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6604   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6605   switch (mv_mode) {
6606     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6607     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6608     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6609     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6610     default: assert(0); return -1;
6611   }
6612 }
6613
6614 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6615   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6616                         log2(1 + abs(new_mv->col - ref_mv->col));
6617   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6618   return mv_diff_cost;
6619 }
6620 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6621                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6622                           int mi_col) {
6623   double mv_cost = get_mv_mode_cost(mv_mode);
6624   if (mv_mode == NEW_MV_MODE) {
6625     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6626                                     bsize, mi_row, mi_col)
6627                     .as_mv;
6628     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6629                                         tpl_frame, bsize, mi_row, mi_col)
6630                         .as_mv;
6631     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6632                                      bsize, mi_row, mi_col)
6633                      .as_mv;
6634     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6635     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6636     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6637   }
6638   return mv_cost;
6639 }
6640
6641 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6642                            GF_PICTURE *gf_picture, MotionField *motion_field,
6643                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6644                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6645                            int_mv *mv) {
6646   MACROBLOCKD *xd = &x->e_mbd;
6647   double mv_dist =
6648       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6649                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6650   double mv_cost =
6651       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6652   double mult = 180;
6653
6654   return mv_cost + mult * log2f(1 + mv_dist);
6655 }
6656
6657 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6658                                  GF_PICTURE *gf_picture,
6659                                  MotionField *motion_field, int frame_idx,
6660                                  TplDepFrame *tpl_frame, int rf_idx,
6661                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6662                                  double *rd, int_mv *mv) {
6663   int best_mv_mode = ZERO_MV_MODE;
6664   int update = 0;
6665   int mv_mode;
6666   *rd = 0;
6667   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6668     double this_rd;
6669     int_mv this_mv;
6670     if (mv_mode == NEW_MV_MODE) {
6671       continue;
6672     }
6673     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6674                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6675     if (update == 0) {
6676       *rd = this_rd;
6677       *mv = this_mv;
6678       best_mv_mode = mv_mode;
6679       update = 1;
6680     } else {
6681       if (this_rd < *rd) {
6682         *rd = this_rd;
6683         *mv = this_mv;
6684         best_mv_mode = mv_mode;
6685       }
6686     }
6687   }
6688   return best_mv_mode;
6689 }
6690
6691 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6692                             GF_PICTURE *gf_picture, MotionField *motion_field,
6693                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6694                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6695   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6696   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6697   int tmp_mv_mode_arr[kMvPreCheckSize];
6698   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6699   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6700   int_mv *select_mv_arr = cpi->select_mv_arr;
6701   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6702   int stride = tpl_frame->stride;
6703   double new_mv_rd = 0;
6704   double no_new_mv_rd = 0;
6705   double this_new_mv_rd = 0;
6706   double this_no_new_mv_rd = 0;
6707   int idx;
6708   int tmp_idx;
6709   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6710
6711   // no new mv
6712   // diagonal scan order
6713   tmp_idx = 0;
6714   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6715     int r;
6716     for (r = 0; r <= idx; ++r) {
6717       int c = idx - r;
6718       int nb_row = mi_row + r * mi_height;
6719       int nb_col = mi_col + c * mi_width;
6720       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6721         double this_rd;
6722         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6723         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6724             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6725             bsize, nb_row, nb_col, &this_rd, mv);
6726         if (r == 0 && c == 0) {
6727           this_no_new_mv_rd = this_rd;
6728         }
6729         no_new_mv_rd += this_rd;
6730         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6731         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6732         ++tmp_idx;
6733       }
6734     }
6735   }
6736
6737   // new mv
6738   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6739   this_new_mv_rd = eval_mv_mode(
6740       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6741       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6742   new_mv_rd = this_new_mv_rd;
6743   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6744   // beforehand.
6745   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6746     int r;
6747     for (r = 0; r <= idx; ++r) {
6748       int c = idx - r;
6749       int nb_row = mi_row + r * mi_height;
6750       int nb_col = mi_col + c * mi_width;
6751       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6752         double this_rd;
6753         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6754         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6755             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6756             bsize, nb_row, nb_col, &this_rd, mv);
6757         new_mv_rd += this_rd;
6758       }
6759     }
6760   }
6761
6762   // update best_mv_mode
6763   tmp_idx = 0;
6764   if (no_new_mv_rd < new_mv_rd) {
6765     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6766       int r;
6767       for (r = 0; r <= idx; ++r) {
6768         int c = idx - r;
6769         int nb_row = mi_row + r * mi_height;
6770         int nb_col = mi_col + c * mi_width;
6771         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6772           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6773           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6774           ++tmp_idx;
6775         }
6776       }
6777     }
6778     rd_diff_arr[mi_row * stride + mi_col] = 0;
6779   } else {
6780     rd_diff_arr[mi_row * stride + mi_col] =
6781         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6782   }
6783 }
6784
6785 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6786                                 GF_PICTURE *gf_picture,
6787                                 MotionField *motion_field, int frame_idx,
6788                                 TplDepFrame *tpl_frame, int rf_idx,
6789                                 BLOCK_SIZE bsize) {
6790   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6791   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6792   const int unit_rows = tpl_frame->mi_rows / mi_height;
6793   const int unit_cols = tpl_frame->mi_cols / mi_width;
6794   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6795   int idx;
6796   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6797     int r;
6798     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6799          ++r) {
6800       int c = idx - r;
6801       int mi_row = r * mi_height;
6802       int mi_col = c * mi_width;
6803       assert(c >= 0 && c < unit_cols);
6804       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6805       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6806       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6807                       rf_idx, bsize, mi_row, mi_col);
6808     }
6809   }
6810 }
6811
6812 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6813                              MotionField *motion_field, int frame_idx,
6814                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
6815                              int mi_row, int mi_col) {
6816   VP9_COMMON *cm = &cpi->common;
6817   MACROBLOCK *x = &td->mb;
6818   MACROBLOCKD *xd = &x->e_mbd;
6819   const int mb_y_offset =
6820       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6821   assert(ref_frame != NULL);
6822   set_mv_limits(cm, x, mi_row, mi_col);
6823   {
6824     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6825     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
6826     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
6827     const int stride = xd->cur_buf->y_stride;
6828     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
6829                              ref_frame_buf, stride, bsize, mi_row, mi_col,
6830                              &mv.as_mv);
6831     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
6832                             bsize, &mv.as_mv);
6833     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
6834   }
6835 }
6836
6837 static void build_motion_field(
6838     VP9_COMP *cpi, int frame_idx,
6839     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
6840   VP9_COMMON *cm = &cpi->common;
6841   ThreadData *td = &cpi->td;
6842   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6843   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6844   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6845   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
6846   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
6847   int mi_row, mi_col;
6848   int rf_idx;
6849
6850   tpl_frame->lambda = (pw * ph) >> 2;
6851   assert(pw * ph == tpl_frame->lambda << 2);
6852
6853   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6854     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6855         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6856     if (ref_frame[rf_idx] == NULL) {
6857       continue;
6858     }
6859     vp9_motion_field_reset_mvs(motion_field);
6860     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6861       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6862         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
6863                          bsize, mi_row, mi_col);
6864       }
6865     }
6866   }
6867 }
6868 #endif  // CONFIG_NON_GREEDY_MV
6869
6870 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6871                               int frame_idx, BLOCK_SIZE bsize) {
6872   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6873   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
6874   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
6875
6876   VP9_COMMON *cm = &cpi->common;
6877   struct scale_factors sf;
6878   int rdmult, idx;
6879   ThreadData *td = &cpi->td;
6880   MACROBLOCK *x = &td->mb;
6881   MACROBLOCKD *xd = &x->e_mbd;
6882   int mi_row, mi_col;
6883
6884 #if CONFIG_VP9_HIGHBITDEPTH
6885   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
6886   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
6887   uint8_t *predictor;
6888 #else
6889   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
6890 #endif
6891   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
6892   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
6893   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
6894   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
6895
6896   const TX_SIZE tx_size = max_txsize_lookup[bsize];
6897   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6898   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6899   int64_t recon_error, sse;
6900 #if CONFIG_NON_GREEDY_MV
6901   int square_block_idx;
6902   int rf_idx;
6903 #endif
6904
6905   // Setup scaling factor
6906 #if CONFIG_VP9_HIGHBITDEPTH
6907   vp9_setup_scale_factors_for_frame(
6908       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6909       this_frame->y_crop_width, this_frame->y_crop_height,
6910       cpi->common.use_highbitdepth);
6911
6912   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6913     predictor = CONVERT_TO_BYTEPTR(predictor16);
6914   else
6915     predictor = predictor8;
6916 #else
6917   vp9_setup_scale_factors_for_frame(
6918       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6919       this_frame->y_crop_width, this_frame->y_crop_height);
6920 #endif  // CONFIG_VP9_HIGHBITDEPTH
6921
6922   // Prepare reference frame pointers. If any reference frame slot is
6923   // unavailable, the pointer will be set to Null.
6924   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
6925     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
6926     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
6927   }
6928
6929   xd->mi = cm->mi_grid_visible;
6930   xd->mi[0] = cm->mi;
6931   xd->cur_buf = this_frame;
6932
6933   // Get rd multiplier set up.
6934   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
6935   set_error_per_bit(&cpi->td.mb, rdmult);
6936   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
6937
6938   tpl_frame->is_valid = 1;
6939
6940   cm->base_qindex = tpl_frame->base_qindex;
6941   vp9_frame_init_quantizer(cpi);
6942
6943 #if CONFIG_NON_GREEDY_MV
6944   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
6945        ++square_block_idx) {
6946     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
6947     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
6948   }
6949   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6950     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6951     if (ref_frame_idx != -1) {
6952       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6953           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6954       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
6955                           tpl_frame, rf_idx, bsize);
6956     }
6957   }
6958 #endif
6959
6960   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6961     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6962       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
6963                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
6964                       tx_size, ref_frame, predictor, &recon_error, &sse);
6965       // Motion flow dependency dispenser.
6966       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
6967                       tpl_frame->stride);
6968
6969       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
6970                        bsize);
6971     }
6972   }
6973 }
6974
6975 #if CONFIG_NON_GREEDY_MV
6976 #define DUMP_TPL_STATS 0
6977 #if DUMP_TPL_STATS
6978 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
6979   int i, j;
6980   printf("%d %d\n", h, w);
6981   for (i = 0; i < h; ++i) {
6982     for (j = 0; j < w; ++j) {
6983       printf("%d ", buf[(row + i) * stride + col + j]);
6984     }
6985   }
6986   printf("\n");
6987 }
6988
6989 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
6990   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
6991            frame_buf->y_width);
6992   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
6993            frame_buf->uv_height, frame_buf->uv_width);
6994   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
6995            frame_buf->uv_height, frame_buf->uv_width);
6996 }
6997
6998 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
6999                            const GF_GROUP *gf_group,
7000                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
7001   int frame_idx;
7002   const VP9_COMMON *cm = &cpi->common;
7003   int rf_idx;
7004   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
7005     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7006       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7007       int mi_row, mi_col;
7008       int ref_frame_idx;
7009       const int mi_height = num_8x8_blocks_high_lookup[bsize];
7010       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7011       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7012       if (ref_frame_idx != -1) {
7013         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
7014         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
7015         const int ref_gf_frame_offset =
7016             gf_group->frame_gop_index[ref_frame_idx];
7017         printf("=\n");
7018         printf(
7019             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
7020             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
7021             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
7022             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
7023         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7024           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7025             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7026               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
7027                                                        frame_idx, rf_idx, bsize,
7028                                                        mi_row, mi_col);
7029               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
7030                      mv.as_mv.col);
7031             }
7032           }
7033         }
7034         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7035           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7036             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7037               const TplDepStats *tpl_ptr =
7038                   &tpl_frame
7039                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
7040               printf("%f ", tpl_ptr->feature_score);
7041             }
7042           }
7043         }
7044         printf("\n");
7045
7046         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7047           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7048             const int mv_mode =
7049                 tpl_frame
7050                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
7051             printf("%d ", mv_mode);
7052           }
7053         }
7054         printf("\n");
7055
7056         dump_frame_buf(gf_picture[frame_idx].frame);
7057         dump_frame_buf(ref_frame_buf);
7058       }
7059     }
7060   }
7061 }
7062 #endif  // DUMP_TPL_STATS
7063 #endif  // CONFIG_NON_GREEDY_MV
7064
7065 static void init_tpl_buffer(VP9_COMP *cpi) {
7066   VP9_COMMON *cm = &cpi->common;
7067   int frame;
7068
7069   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7070   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7071 #if CONFIG_NON_GREEDY_MV
7072   int rf_idx;
7073
7074   vpx_free(cpi->select_mv_arr);
7075   CHECK_MEM_ERROR(
7076       cm, cpi->select_mv_arr,
7077       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
7078 #endif
7079
7080   // TODO(jingning): Reduce the actual memory use for tpl model build up.
7081   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7082     if (cpi->tpl_stats[frame].width >= mi_cols &&
7083         cpi->tpl_stats[frame].height >= mi_rows &&
7084         cpi->tpl_stats[frame].tpl_stats_ptr)
7085       continue;
7086
7087 #if CONFIG_NON_GREEDY_MV
7088     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7089       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7090       CHECK_MEM_ERROR(
7091           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
7092           vpx_calloc(mi_rows * mi_cols * 4,
7093                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
7094       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7095       CHECK_MEM_ERROR(
7096           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
7097           vpx_calloc(mi_rows * mi_cols * 4,
7098                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
7099     }
7100 #endif
7101     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7102     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
7103                     vpx_calloc(mi_rows * mi_cols,
7104                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
7105     cpi->tpl_stats[frame].is_valid = 0;
7106     cpi->tpl_stats[frame].width = mi_cols;
7107     cpi->tpl_stats[frame].height = mi_rows;
7108     cpi->tpl_stats[frame].stride = mi_cols;
7109     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
7110     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7111   }
7112
7113   for (frame = 0; frame < REF_FRAMES; ++frame) {
7114     cpi->enc_frame_buf[frame].mem_valid = 0;
7115     cpi->enc_frame_buf[frame].released = 1;
7116   }
7117 }
7118
7119 static void free_tpl_buffer(VP9_COMP *cpi) {
7120   int frame;
7121 #if CONFIG_NON_GREEDY_MV
7122   vp9_free_motion_field_info(&cpi->motion_field_info);
7123   vpx_free(cpi->select_mv_arr);
7124 #endif
7125   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7126 #if CONFIG_NON_GREEDY_MV
7127     int rf_idx;
7128     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7129       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7130       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7131     }
7132 #endif
7133     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7134     cpi->tpl_stats[frame].is_valid = 0;
7135   }
7136 }
7137
7138 static void setup_tpl_stats(VP9_COMP *cpi) {
7139   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7140   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7141   int tpl_group_frames = 0;
7142   int frame_idx;
7143   cpi->tpl_bsize = BLOCK_32X32;
7144
7145   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7146
7147   init_tpl_stats(cpi);
7148
7149   // Backward propagation from tpl_group_frames to 1.
7150   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7151     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7152     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7153   }
7154 #if CONFIG_NON_GREEDY_MV
7155   cpi->tpl_ready = 1;
7156 #if DUMP_TPL_STATS
7157   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7158 #endif  // DUMP_TPL_STATS
7159 #endif  // CONFIG_NON_GREEDY_MV
7160 }
7161
7162 #if !CONFIG_REALTIME_ONLY
7163 #if CONFIG_RATE_CTRL
7164 static void copy_frame_counts(const FRAME_COUNTS *input_counts,
7165                               FRAME_COUNTS *output_counts) {
7166   int i, j, k, l, m, n;
7167   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
7168     for (j = 0; j < INTRA_MODES; ++j) {
7169       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
7170     }
7171   }
7172   for (i = 0; i < INTRA_MODES; ++i) {
7173     for (j = 0; j < INTRA_MODES; ++j) {
7174       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
7175     }
7176   }
7177   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
7178     for (j = 0; j < PARTITION_TYPES; ++j) {
7179       output_counts->partition[i][j] = input_counts->partition[i][j];
7180     }
7181   }
7182   for (i = 0; i < TX_SIZES; ++i) {
7183     for (j = 0; j < PLANE_TYPES; ++j) {
7184       for (k = 0; k < REF_TYPES; ++k) {
7185         for (l = 0; l < COEF_BANDS; ++l) {
7186           for (m = 0; m < COEFF_CONTEXTS; ++m) {
7187             output_counts->eob_branch[i][j][k][l][m] =
7188                 input_counts->eob_branch[i][j][k][l][m];
7189             for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
7190               output_counts->coef[i][j][k][l][m][n] =
7191                   input_counts->coef[i][j][k][l][m][n];
7192             }
7193           }
7194         }
7195       }
7196     }
7197   }
7198   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
7199     for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
7200       output_counts->switchable_interp[i][j] =
7201           input_counts->switchable_interp[i][j];
7202     }
7203   }
7204   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
7205     for (j = 0; j < INTER_MODES; ++j) {
7206       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
7207     }
7208   }
7209   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
7210     for (j = 0; j < 2; ++j) {
7211       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
7212     }
7213   }
7214   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
7215     for (j = 0; j < 2; ++j) {
7216       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
7217     }
7218   }
7219   for (i = 0; i < REF_CONTEXTS; ++i) {
7220     for (j = 0; j < 2; ++j) {
7221       for (k = 0; k < 2; ++k) {
7222         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
7223       }
7224     }
7225   }
7226   for (i = 0; i < REF_CONTEXTS; ++i) {
7227     for (j = 0; j < 2; ++j) {
7228       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
7229     }
7230   }
7231   for (i = 0; i < SKIP_CONTEXTS; ++i) {
7232     for (j = 0; j < 2; ++j) {
7233       output_counts->skip[i][j] = input_counts->skip[i][j];
7234     }
7235   }
7236   for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
7237     for (j = 0; j < TX_SIZES; j++) {
7238       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
7239     }
7240     for (j = 0; j < TX_SIZES - 1; j++) {
7241       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
7242     }
7243     for (j = 0; j < TX_SIZES - 2; j++) {
7244       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
7245     }
7246   }
7247   for (i = 0; i < TX_SIZES; i++) {
7248     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
7249   }
7250   for (i = 0; i < MV_JOINTS; i++) {
7251     output_counts->mv.joints[i] = input_counts->mv.joints[i];
7252   }
7253   for (k = 0; k < 2; k++) {
7254     nmv_component_counts *const comps = &output_counts->mv.comps[k];
7255     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
7256     for (i = 0; i < 2; i++) {
7257       comps->sign[i] = comps_t->sign[i];
7258       comps->class0_hp[i] = comps_t->class0_hp[i];
7259       comps->hp[i] = comps_t->hp[i];
7260     }
7261     for (i = 0; i < MV_CLASSES; i++) {
7262       comps->classes[i] = comps_t->classes[i];
7263     }
7264     for (i = 0; i < CLASS0_SIZE; i++) {
7265       comps->class0[i] = comps_t->class0[i];
7266       for (j = 0; j < MV_FP_SIZE; j++) {
7267         comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
7268       }
7269     }
7270     for (i = 0; i < MV_OFFSET_BITS; i++) {
7271       for (j = 0; j < 2; j++) {
7272         comps->bits[i][j] = comps_t->bits[i][j];
7273       }
7274     }
7275     for (i = 0; i < MV_FP_SIZE; i++) {
7276       comps->fp[i] = comps_t->fp[i];
7277     }
7278   }
7279 }
7280
7281 static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
7282                                         IMAGE_BUFFER *image_buffer) {
7283   const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
7284                                    yv12_buffer->v_buffer };
7285   const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
7286                                  yv12_buffer->uv_stride };
7287   const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
7288                         yv12_buffer->uv_crop_width };
7289   const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
7290                         yv12_buffer->uv_crop_height };
7291   int plane;
7292   for (plane = 0; plane < 3; ++plane) {
7293     const int src_stride = src_stride_ls[plane];
7294     const int w = w_ls[plane];
7295     const int h = h_ls[plane];
7296     const uint8_t *src_buf = src_buf_ls[plane];
7297     uint8_t *dst_buf = image_buffer->plane_buffer[plane];
7298     int r;
7299     assert(image_buffer->plane_width[plane] == w);
7300     assert(image_buffer->plane_height[plane] == h);
7301     for (r = 0; r < h; ++r) {
7302       memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
7303       src_buf += src_stride;
7304       dst_buf += w;
7305     }
7306   }
7307 }
7308 #endif  // CONFIG_RATE_CTRL
7309 static void update_encode_frame_result(
7310     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
7311     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
7312     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
7313     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
7314 #if CONFIG_RATE_CTRL
7315     const PARTITION_INFO *partition_info,
7316     const MOTION_VECTOR_INFO *motion_vector_info,
7317 #endif  // CONFIG_RATE_CTRL
7318     ENCODE_FRAME_RESULT *encode_frame_result) {
7319 #if CONFIG_RATE_CTRL
7320   PSNR_STATS psnr;
7321 #if CONFIG_VP9_HIGHBITDEPTH
7322   vpx_calc_highbd_psnr(source_frame, coded_frame_buf->buf, &psnr, bit_depth,
7323                        input_bit_depth);
7324 #else   // CONFIG_VP9_HIGHBITDEPTH
7325   (void)bit_depth;
7326   (void)input_bit_depth;
7327   vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
7328 #endif  // CONFIG_VP9_HIGHBITDEPTH
7329   encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
7330
7331   if (update_type != KF_UPDATE) {
7332     const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
7333                                                                  VP9_GOLD_FLAG,
7334                                                                  VP9_ALT_FLAG };
7335     int i;
7336     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7337       assert(ref_frame_bufs[i] != NULL);
7338       encode_frame_result->ref_frame_coding_indexes[i] =
7339           ref_frame_bufs[i]->frame_coding_index;
7340       encode_frame_result->ref_frame_valid_list[i] =
7341           !(ref_frame_flags & inter_ref_flags[i]);
7342     }
7343   } else {
7344     // No reference frame is available when this is a key frame.
7345     int i;
7346     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7347       encode_frame_result->ref_frame_coding_indexes[i] = -1;
7348       encode_frame_result->ref_frame_valid_list[i] = 0;
7349     }
7350   }
7351   encode_frame_result->psnr = psnr.psnr[0];
7352   encode_frame_result->sse = psnr.sse[0];
7353   copy_frame_counts(counts, &encode_frame_result->frame_counts);
7354   encode_frame_result->partition_info = partition_info;
7355   encode_frame_result->motion_vector_info = motion_vector_info;
7356   if (encode_frame_result->coded_frame.allocated) {
7357     yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
7358                                 &encode_frame_result->coded_frame);
7359   }
7360 #else   // CONFIG_RATE_CTRL
7361   (void)ref_frame_flags;
7362   (void)bit_depth;
7363   (void)input_bit_depth;
7364   (void)source_frame;
7365   (void)coded_frame_buf;
7366   (void)ref_frame_bufs;
7367   (void)counts;
7368 #endif  // CONFIG_RATE_CTRL
7369   encode_frame_result->show_idx = coded_frame_buf->frame_index;
7370   encode_frame_result->update_type = update_type;
7371   encode_frame_result->quantize_index = quantize_index;
7372 }
7373 #endif  // !CONFIG_REALTIME_ONLY
7374
7375 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
7376   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
7377 #if CONFIG_RATE_CTRL
7378   encode_frame_result->frame_coding_index = -1;
7379   vp9_zero(encode_frame_result->coded_frame);
7380   encode_frame_result->coded_frame.allocated = 0;
7381 #endif  // CONFIG_RATE_CTRL
7382 }
7383
7384 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7385                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7386                             int64_t *time_end, int flush,
7387                             ENCODE_FRAME_RESULT *encode_frame_result) {
7388   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7389   VP9_COMMON *const cm = &cpi->common;
7390   BufferPool *const pool = cm->buffer_pool;
7391   RATE_CONTROL *const rc = &cpi->rc;
7392   struct vpx_usec_timer cmptimer;
7393   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7394   struct lookahead_entry *last_source = NULL;
7395   struct lookahead_entry *source = NULL;
7396   int arf_src_index;
7397   const int gf_group_index = cpi->twopass.gf_group.index;
7398   int i;
7399
7400   if (is_one_pass_cbr_svc(cpi)) {
7401     vp9_one_pass_cbr_svc_start_layer(cpi);
7402   }
7403
7404   vpx_usec_timer_start(&cmptimer);
7405
7406   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7407
7408   // Is multi-arf enabled.
7409   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7410   // will not work properly with svc.
7411   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7412   // is greater than or equal to 2.
7413   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7414     cpi->multi_layer_arf = 1;
7415   else
7416     cpi->multi_layer_arf = 0;
7417
7418   // Normal defaults
7419   cm->reset_frame_context = 0;
7420   cm->refresh_frame_context = 1;
7421   if (!is_one_pass_cbr_svc(cpi)) {
7422     cpi->refresh_last_frame = 1;
7423     cpi->refresh_golden_frame = 0;
7424     cpi->refresh_alt_ref_frame = 0;
7425   }
7426
7427   // Should we encode an arf frame.
7428   arf_src_index = get_arf_src_index(cpi);
7429
7430   if (arf_src_index) {
7431     for (i = 0; i <= arf_src_index; ++i) {
7432       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7433       // Avoid creating an alt-ref if there's a forced keyframe pending.
7434       if (e == NULL) {
7435         break;
7436       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7437         arf_src_index = 0;
7438         flush = 1;
7439         break;
7440       }
7441     }
7442   }
7443
7444   // Clear arf index stack before group of pictures processing starts.
7445   if (gf_group_index == 1) {
7446     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7447     cpi->twopass.gf_group.stack_size = 0;
7448   }
7449
7450   if (arf_src_index) {
7451     assert(arf_src_index <= rc->frames_to_key);
7452     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7453       cpi->alt_ref_source = source;
7454
7455 #if !CONFIG_REALTIME_ONLY
7456       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7457           (oxcf->arnr_strength > 0)) {
7458         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7459         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7460
7461         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7462         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7463
7464         // Produce the filtered ARF frame.
7465         vp9_temporal_filter(cpi, arf_src_index);
7466         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7467
7468         // for small bitrates segmentation overhead usually
7469         // eats all bitrate gain from enabling delta quantizers
7470         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7471           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7472
7473         force_src_buffer = &cpi->alt_ref_buffer;
7474       }
7475 #endif
7476       cm->show_frame = 0;
7477       cm->intra_only = 0;
7478       cpi->refresh_alt_ref_frame = 1;
7479       cpi->refresh_golden_frame = 0;
7480       cpi->refresh_last_frame = 0;
7481       rc->is_src_frame_alt_ref = 0;
7482       rc->source_alt_ref_pending = 0;
7483     } else {
7484       rc->source_alt_ref_pending = 0;
7485     }
7486   }
7487
7488   if (!source) {
7489     // Get last frame source.
7490     if (cm->current_video_frame > 0) {
7491       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7492         return -1;
7493     }
7494
7495     // Read in the source frame.
7496     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7497       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7498     else
7499       source = vp9_lookahead_pop(cpi->lookahead, flush);
7500
7501     if (source != NULL) {
7502       cm->show_frame = 1;
7503       cm->intra_only = 0;
7504       // If the flags indicate intra frame, but if the current picture is for
7505       // spatial layer above first_spatial_layer_to_encode, it should not be an
7506       // intra picture.
7507       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7508           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7509         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7510       }
7511
7512       // Check to see if the frame should be encoded as an arf overlay.
7513       check_src_altref(cpi, source);
7514     }
7515   }
7516
7517   if (source) {
7518     cpi->un_scaled_source = cpi->Source =
7519         force_src_buffer ? force_src_buffer : &source->img;
7520
7521 #ifdef ENABLE_KF_DENOISE
7522     // Copy of raw source for metrics calculation.
7523     if (is_psnr_calc_enabled(cpi))
7524       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7525 #endif
7526
7527     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7528
7529     *time_stamp = source->ts_start;
7530     *time_end = source->ts_end;
7531     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7532   } else {
7533     *size = 0;
7534     return -1;
7535   }
7536
7537   if (source->ts_start < cpi->first_time_stamp_ever) {
7538     cpi->first_time_stamp_ever = source->ts_start;
7539     cpi->last_end_time_stamp_seen = source->ts_start;
7540   }
7541
7542   // Clear down mmx registers
7543   vpx_clear_system_state();
7544
7545   // adjust frame rates based on timestamps given
7546   if (cm->show_frame) {
7547     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7548         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7549       vp9_svc_adjust_frame_rate(cpi);
7550     else
7551       adjust_frame_rate(cpi, source);
7552   }
7553
7554   if (is_one_pass_cbr_svc(cpi)) {
7555     vp9_update_temporal_layer_framerate(cpi);
7556     vp9_restore_layer_context(cpi);
7557   }
7558
7559   // Find a free buffer for the new frame, releasing the reference previously
7560   // held.
7561   if (cm->new_fb_idx != INVALID_IDX) {
7562     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7563   }
7564   cm->new_fb_idx = get_free_fb(cm);
7565
7566   if (cm->new_fb_idx == INVALID_IDX) return -1;
7567
7568   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7569
7570   // Start with a 0 size frame.
7571   *size = 0;
7572
7573   cpi->frame_flags = *frame_flags;
7574
7575 #if !CONFIG_REALTIME_ONLY
7576   if ((oxcf->pass == 2) && !cpi->use_svc) {
7577     vp9_rc_get_second_pass_params(cpi);
7578   } else if (oxcf->pass == 1) {
7579     set_frame_size(cpi);
7580   }
7581 #endif  // !CONFIG_REALTIME_ONLY
7582
7583   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7584       cpi->level_constraint.fail_flag == 0)
7585     level_rc_framerate(cpi, arf_src_index);
7586
7587   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7588     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7589   }
7590
7591   if (cpi->kmeans_data_arr_alloc == 0) {
7592     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7593     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7594 #if CONFIG_MULTITHREAD
7595     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7596 #endif
7597     CHECK_MEM_ERROR(
7598         cm, cpi->kmeans_data_arr,
7599         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7600     cpi->kmeans_data_stride = mi_cols;
7601     cpi->kmeans_data_arr_alloc = 1;
7602   }
7603
7604 #if CONFIG_NON_GREEDY_MV
7605   {
7606     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7607     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7608     Status status = vp9_alloc_motion_field_info(
7609         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7610     if (status == STATUS_FAILED) {
7611       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7612                          "vp9_alloc_motion_field_info failed");
7613     }
7614   }
7615 #endif  // CONFIG_NON_GREEDY_MV
7616
7617   if (gf_group_index == 1 &&
7618       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7619       cpi->sf.enable_tpl_model) {
7620     init_tpl_buffer(cpi);
7621     vp9_estimate_qp_gop(cpi);
7622     setup_tpl_stats(cpi);
7623   }
7624
7625 #if CONFIG_BITSTREAM_DEBUG
7626   assert(cpi->oxcf.max_threads == 0 &&
7627          "bitstream debug tool does not support multithreading");
7628   bitstream_queue_record_write();
7629 #endif
7630 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7631   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7632 #endif
7633
7634   cpi->td.mb.fp_src_pred = 0;
7635 #if CONFIG_REALTIME_ONLY
7636   (void)encode_frame_result;
7637   if (cpi->use_svc) {
7638     SvcEncode(cpi, size, dest, frame_flags);
7639   } else {
7640     // One pass encode
7641     Pass0Encode(cpi, size, dest, frame_flags);
7642   }
7643 #else  // !CONFIG_REALTIME_ONLY
7644   if (oxcf->pass == 1 && !cpi->use_svc) {
7645     const int lossless = is_lossless_requested(oxcf);
7646 #if CONFIG_VP9_HIGHBITDEPTH
7647     if (cpi->oxcf.use_highbitdepth)
7648       cpi->td.mb.fwd_txfm4x4 =
7649           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7650     else
7651       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7652     cpi->td.mb.highbd_inv_txfm_add =
7653         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7654 #else
7655     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7656 #endif  // CONFIG_VP9_HIGHBITDEPTH
7657     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7658     vp9_first_pass(cpi, source);
7659   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7660     Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
7661     vp9_twopass_postencode_update(cpi);
7662   } else if (cpi->use_svc) {
7663     SvcEncode(cpi, size, dest, frame_flags);
7664   } else {
7665     // One pass encode
7666     Pass0Encode(cpi, size, dest, frame_flags);
7667   }
7668 #endif  // CONFIG_REALTIME_ONLY
7669
7670   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7671
7672   if (cm->refresh_frame_context)
7673     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7674
7675   // No frame encoded, or frame was dropped, release scaled references.
7676   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7677     release_scaled_references(cpi);
7678   }
7679
7680   if (*size > 0) {
7681     cpi->droppable = !frame_is_reference(cpi);
7682   }
7683
7684   // Save layer specific state.
7685   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7686                                     cpi->svc.number_spatial_layers > 1) &&
7687                                    oxcf->pass == 2)) {
7688     vp9_save_layer_context(cpi);
7689   }
7690
7691   vpx_usec_timer_mark(&cmptimer);
7692   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7693
7694   if (cpi->keep_level_stats && oxcf->pass != 1)
7695     update_level_info(cpi, size, arf_src_index);
7696
7697 #if CONFIG_INTERNAL_STATS
7698
7699   if (oxcf->pass != 1) {
7700     double samples = 0.0;
7701     cpi->bytes += (int)(*size);
7702
7703     if (cm->show_frame) {
7704       uint32_t bit_depth = 8;
7705       uint32_t in_bit_depth = 8;
7706       cpi->count++;
7707 #if CONFIG_VP9_HIGHBITDEPTH
7708       if (cm->use_highbitdepth) {
7709         in_bit_depth = cpi->oxcf.input_bit_depth;
7710         bit_depth = cm->bit_depth;
7711       }
7712 #endif
7713
7714       if (cpi->b_calculate_psnr) {
7715         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7716         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7717         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7718         PSNR_STATS psnr;
7719 #if CONFIG_VP9_HIGHBITDEPTH
7720         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7721                              in_bit_depth);
7722 #else
7723         vpx_calc_psnr(orig, recon, &psnr);
7724 #endif  // CONFIG_VP9_HIGHBITDEPTH
7725
7726         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7727                           psnr.psnr[0], &cpi->psnr);
7728         cpi->total_sq_error += psnr.sse[0];
7729         cpi->total_samples += psnr.samples[0];
7730         samples = psnr.samples[0];
7731
7732         {
7733           PSNR_STATS psnr2;
7734           double frame_ssim2 = 0, weight = 0;
7735 #if CONFIG_VP9_POSTPROC
7736           if (vpx_alloc_frame_buffer(
7737                   pp, recon->y_crop_width, recon->y_crop_height,
7738                   cm->subsampling_x, cm->subsampling_y,
7739 #if CONFIG_VP9_HIGHBITDEPTH
7740                   cm->use_highbitdepth,
7741 #endif
7742                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7743             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7744                                "Failed to allocate post processing buffer");
7745           }
7746           {
7747             vp9_ppflags_t ppflags;
7748             ppflags.post_proc_flag = VP9D_DEBLOCK;
7749             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7750             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7751             vp9_post_proc_frame(cm, pp, &ppflags,
7752                                 cpi->un_scaled_source->y_width);
7753           }
7754 #endif
7755           vpx_clear_system_state();
7756
7757 #if CONFIG_VP9_HIGHBITDEPTH
7758           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7759                                cpi->oxcf.input_bit_depth);
7760 #else
7761           vpx_calc_psnr(orig, pp, &psnr2);
7762 #endif  // CONFIG_VP9_HIGHBITDEPTH
7763
7764           cpi->totalp_sq_error += psnr2.sse[0];
7765           cpi->totalp_samples += psnr2.samples[0];
7766           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7767                             psnr2.psnr[0], &cpi->psnrp);
7768
7769 #if CONFIG_VP9_HIGHBITDEPTH
7770           if (cm->use_highbitdepth) {
7771             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7772                                                in_bit_depth);
7773           } else {
7774             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7775           }
7776 #else
7777           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7778 #endif  // CONFIG_VP9_HIGHBITDEPTH
7779
7780           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7781           cpi->summed_quality += frame_ssim2 * weight;
7782           cpi->summed_weights += weight;
7783
7784 #if CONFIG_VP9_HIGHBITDEPTH
7785           if (cm->use_highbitdepth) {
7786             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7787                                                in_bit_depth);
7788           } else {
7789             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7790           }
7791 #else
7792           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7793 #endif  // CONFIG_VP9_HIGHBITDEPTH
7794
7795           cpi->summedp_quality += frame_ssim2 * weight;
7796           cpi->summedp_weights += weight;
7797 #if 0
7798           if (cm->show_frame) {
7799             FILE *f = fopen("q_used.stt", "a");
7800             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7801                     cpi->common.current_video_frame, psnr2.psnr[1],
7802                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7803             fclose(f);
7804           }
7805 #endif
7806         }
7807       }
7808       if (cpi->b_calculate_blockiness) {
7809 #if CONFIG_VP9_HIGHBITDEPTH
7810         if (!cm->use_highbitdepth)
7811 #endif
7812         {
7813           double frame_blockiness = vp9_get_blockiness(
7814               cpi->Source->y_buffer, cpi->Source->y_stride,
7815               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7816               cpi->Source->y_width, cpi->Source->y_height);
7817           cpi->worst_blockiness =
7818               VPXMAX(cpi->worst_blockiness, frame_blockiness);
7819           cpi->total_blockiness += frame_blockiness;
7820         }
7821       }
7822
7823       if (cpi->b_calculate_consistency) {
7824 #if CONFIG_VP9_HIGHBITDEPTH
7825         if (!cm->use_highbitdepth)
7826 #endif
7827         {
7828           double this_inconsistency = vpx_get_ssim_metrics(
7829               cpi->Source->y_buffer, cpi->Source->y_stride,
7830               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7831               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
7832               &cpi->metrics, 1);
7833
7834           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
7835           double consistency =
7836               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
7837           if (consistency > 0.0)
7838             cpi->worst_consistency =
7839                 VPXMIN(cpi->worst_consistency, consistency);
7840           cpi->total_inconsistency += this_inconsistency;
7841         }
7842       }
7843
7844       {
7845         double y, u, v, frame_all;
7846         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
7847                                       &v, bit_depth, in_bit_depth);
7848         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
7849       }
7850       {
7851         double y, u, v, frame_all;
7852         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
7853                                 bit_depth, in_bit_depth);
7854         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
7855       }
7856     }
7857   }
7858
7859 #endif
7860
7861   if (is_one_pass_cbr_svc(cpi)) {
7862     if (cm->show_frame) {
7863       ++cpi->svc.spatial_layer_to_encode;
7864       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
7865         cpi->svc.spatial_layer_to_encode = 0;
7866     }
7867   }
7868
7869   vpx_clear_system_state();
7870   return 0;
7871 }
7872
7873 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
7874                               vp9_ppflags_t *flags) {
7875   VP9_COMMON *cm = &cpi->common;
7876 #if !CONFIG_VP9_POSTPROC
7877   (void)flags;
7878 #endif
7879
7880   if (!cm->show_frame) {
7881     return -1;
7882   } else {
7883     int ret;
7884 #if CONFIG_VP9_POSTPROC
7885     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
7886 #else
7887     if (cm->frame_to_show) {
7888       *dest = *cm->frame_to_show;
7889       dest->y_width = cm->width;
7890       dest->y_height = cm->height;
7891       dest->uv_width = cm->width >> cm->subsampling_x;
7892       dest->uv_height = cm->height >> cm->subsampling_y;
7893       ret = 0;
7894     } else {
7895       ret = -1;
7896     }
7897 #endif  // !CONFIG_VP9_POSTPROC
7898     vpx_clear_system_state();
7899     return ret;
7900   }
7901 }
7902
7903 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
7904                           VPX_SCALING vert_mode) {
7905   VP9_COMMON *cm = &cpi->common;
7906   int hr = 0, hs = 0, vr = 0, vs = 0;
7907
7908   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
7909
7910   Scale2Ratio(horiz_mode, &hr, &hs);
7911   Scale2Ratio(vert_mode, &vr, &vs);
7912
7913   // always go to the next whole number
7914   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
7915   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
7916   if (cm->current_video_frame) {
7917     assert(cm->width <= cpi->initial_width);
7918     assert(cm->height <= cpi->initial_height);
7919   }
7920
7921   update_frame_size(cpi);
7922
7923   return 0;
7924 }
7925
7926 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
7927                          unsigned int height) {
7928   VP9_COMMON *cm = &cpi->common;
7929 #if CONFIG_VP9_HIGHBITDEPTH
7930   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
7931 #else
7932   update_initial_width(cpi, 0, 1, 1);
7933 #endif  // CONFIG_VP9_HIGHBITDEPTH
7934
7935 #if CONFIG_VP9_TEMPORAL_DENOISING
7936   setup_denoiser_buffer(cpi);
7937 #endif
7938   alloc_raw_frame_buffers(cpi);
7939   if (width) {
7940     cm->width = width;
7941     if (cm->width > cpi->initial_width) {
7942       cm->width = cpi->initial_width;
7943       printf("Warning: Desired width too large, changed to %d\n", cm->width);
7944     }
7945   }
7946
7947   if (height) {
7948     cm->height = height;
7949     if (cm->height > cpi->initial_height) {
7950       cm->height = cpi->initial_height;
7951       printf("Warning: Desired height too large, changed to %d\n", cm->height);
7952     }
7953   }
7954   assert(cm->width <= cpi->initial_width);
7955   assert(cm->height <= cpi->initial_height);
7956
7957   update_frame_size(cpi);
7958
7959   return 0;
7960 }
7961
7962 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
7963   cpi->use_svc = use_svc;
7964   return;
7965 }
7966
7967 int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
7968
7969 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
7970   if (flags &
7971       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
7972     int ref = 7;
7973
7974     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
7975
7976     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
7977
7978     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
7979
7980     vp9_use_as_reference(cpi, ref);
7981   }
7982
7983   if (flags &
7984       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
7985        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
7986     int upd = 7;
7987
7988     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
7989
7990     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
7991
7992     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
7993
7994     vp9_update_reference(cpi, upd);
7995   }
7996
7997   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
7998     vp9_update_entropy(cpi, 0);
7999   }
8000 }
8001
8002 void vp9_set_row_mt(VP9_COMP *cpi) {
8003   // Enable row based multi-threading for supported modes of encoding
8004   cpi->row_mt = 0;
8005   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
8006        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
8007       cpi->oxcf.row_mt && !cpi->use_svc)
8008     cpi->row_mt = 1;
8009
8010   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
8011       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
8012       !cpi->use_svc)
8013     cpi->row_mt = 1;
8014
8015   // In realtime mode, enable row based multi-threading for all the speed levels
8016   // where non-rd path is used.
8017   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
8018     cpi->row_mt = 1;
8019   }
8020
8021   if (cpi->row_mt)
8022     cpi->row_mt_bit_exact = 1;
8023   else
8024     cpi->row_mt_bit_exact = 0;
8025 }