granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI structure in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024 #if CONFIG_RATE_CTRL
1025   free_partition_info(cpi);
1026   free_motion_vector_info(cpi);
1027   free_fp_motion_vector_info(cpi);
1028   free_tpl_stats_info(cpi);
1029 #endif
1030
1031   vp9_free_ref_frame_buffers(cm->buffer_pool);
1032 #if CONFIG_VP9_POSTPROC
1033   vp9_free_postproc_buffers(cm);
1034 #endif
1035   vp9_free_context_buffers(cm);
1036
1037   vpx_free_frame_buffer(&cpi->last_frame_uf);
1038   vpx_free_frame_buffer(&cpi->scaled_source);
1039   vpx_free_frame_buffer(&cpi->scaled_last_source);
1040   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1041 #ifdef ENABLE_KF_DENOISE
1042   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1043   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1044 #endif
1045
1046   vp9_lookahead_destroy(cpi->lookahead);
1047
1048   vpx_free(cpi->tile_tok[0][0]);
1049   cpi->tile_tok[0][0] = 0;
1050
1051   vpx_free(cpi->tplist[0][0]);
1052   cpi->tplist[0][0] = NULL;
1053
1054   vp9_free_pc_tree(&cpi->td);
1055
1056   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1057     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1058     vpx_free(lc->rc_twopass_stats_in.buf);
1059     lc->rc_twopass_stats_in.buf = NULL;
1060     lc->rc_twopass_stats_in.sz = 0;
1061   }
1062
1063   if (cpi->source_diff_var != NULL) {
1064     vpx_free(cpi->source_diff_var);
1065     cpi->source_diff_var = NULL;
1066   }
1067
1068   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1069     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1070   }
1071   memset(&cpi->svc.scaled_frames[0], 0,
1072          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1073
1074   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1075   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1076
1077   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1078   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1079
1080   vp9_free_svc_cyclic_refresh(cpi);
1081 }
1082
1083 static void save_coding_context(VP9_COMP *cpi) {
1084   CODING_CONTEXT *const cc = &cpi->coding_context;
1085   VP9_COMMON *cm = &cpi->common;
1086
1087   // Stores a snapshot of key state variables which can subsequently be
1088   // restored with a call to vp9_restore_coding_context. These functions are
1089   // intended for use in a re-code loop in vp9_compress_frame where the
1090   // quantizer value is adjusted between loop iterations.
1091   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1092
1093   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1094          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1095   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1096          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1097   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1098          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1099   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1100          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1101
1102   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1103
1104   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1105          (cm->mi_rows * cm->mi_cols));
1106
1107   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1108   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1109
1110   cc->fc = *cm->fc;
1111 }
1112
1113 static void restore_coding_context(VP9_COMP *cpi) {
1114   CODING_CONTEXT *const cc = &cpi->coding_context;
1115   VP9_COMMON *cm = &cpi->common;
1116
1117   // Restore key state variables to the snapshot state stored in the
1118   // previous call to vp9_save_coding_context.
1119   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1120
1121   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1122   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1123   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1124          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1125   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1126          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1127
1128   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1129
1130   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1131          (cm->mi_rows * cm->mi_cols));
1132
1133   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1134   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1135
1136   *cm->fc = cc->fc;
1137 }
1138
1139 #if !CONFIG_REALTIME_ONLY
1140 static void configure_static_seg_features(VP9_COMP *cpi) {
1141   VP9_COMMON *const cm = &cpi->common;
1142   const RATE_CONTROL *const rc = &cpi->rc;
1143   struct segmentation *const seg = &cm->seg;
1144
1145   int high_q = (int)(rc->avg_q > 48.0);
1146   int qi_delta;
1147
1148   // Disable and clear down for KF
1149   if (cm->frame_type == KEY_FRAME) {
1150     // Clear down the global segmentation map
1151     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1152     seg->update_map = 0;
1153     seg->update_data = 0;
1154     cpi->static_mb_pct = 0;
1155
1156     // Disable segmentation
1157     vp9_disable_segmentation(seg);
1158
1159     // Clear down the segment features.
1160     vp9_clearall_segfeatures(seg);
1161   } else if (cpi->refresh_alt_ref_frame) {
1162     // If this is an alt ref frame
1163     // Clear down the global segmentation map
1164     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1165     seg->update_map = 0;
1166     seg->update_data = 0;
1167     cpi->static_mb_pct = 0;
1168
1169     // Disable segmentation and individual segment features by default
1170     vp9_disable_segmentation(seg);
1171     vp9_clearall_segfeatures(seg);
1172
1173     // Scan frames from current to arf frame.
1174     // This function re-enables segmentation if appropriate.
1175     vp9_update_mbgraph_stats(cpi);
1176
1177     // If segmentation was enabled set those features needed for the
1178     // arf itself.
1179     if (seg->enabled) {
1180       seg->update_map = 1;
1181       seg->update_data = 1;
1182
1183       qi_delta =
1184           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1185       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1186       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1187
1188       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1189       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1190
1191       // Where relevant assume segment data is delta data
1192       seg->abs_delta = SEGMENT_DELTADATA;
1193     }
1194   } else if (seg->enabled) {
1195     // All other frames if segmentation has been enabled
1196
1197     // First normal frame in a valid gf or alt ref group
1198     if (rc->frames_since_golden == 0) {
1199       // Set up segment features for normal frames in an arf group
1200       if (rc->source_alt_ref_active) {
1201         seg->update_map = 0;
1202         seg->update_data = 1;
1203         seg->abs_delta = SEGMENT_DELTADATA;
1204
1205         qi_delta =
1206             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1207         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1208         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1209
1210         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1211         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1212
1213         // Segment coding disabled for compred testing
1214         if (high_q || (cpi->static_mb_pct == 100)) {
1215           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1216           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1217           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1218         }
1219       } else {
1220         // Disable segmentation and clear down features if alt ref
1221         // is not active for this group
1222
1223         vp9_disable_segmentation(seg);
1224
1225         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1226
1227         seg->update_map = 0;
1228         seg->update_data = 0;
1229
1230         vp9_clearall_segfeatures(seg);
1231       }
1232     } else if (rc->is_src_frame_alt_ref) {
1233       // Special case where we are coding over the top of a previous
1234       // alt ref frame.
1235       // Segment coding disabled for compred testing
1236
1237       // Enable ref frame features for segment 0 as well
1238       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1239       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1240
1241       // All mbs should use ALTREF_FRAME
1242       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1243       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1244       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1245       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1246
1247       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1248       if (high_q) {
1249         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1250         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1251       }
1252       // Enable data update
1253       seg->update_data = 1;
1254     } else {
1255       // All other frames.
1256
1257       // No updates.. leave things as they are.
1258       seg->update_map = 0;
1259       seg->update_data = 0;
1260     }
1261   }
1262 }
1263 #endif  // !CONFIG_REALTIME_ONLY
1264
1265 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1266   VP9_COMMON *const cm = &cpi->common;
1267   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1268   uint8_t *cache_ptr = cm->last_frame_seg_map;
1269   int row, col;
1270
1271   for (row = 0; row < cm->mi_rows; row++) {
1272     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1273     uint8_t *cache = cache_ptr;
1274     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1275       cache[0] = mi_8x8[0]->segment_id;
1276     mi_8x8_ptr += cm->mi_stride;
1277     cache_ptr += cm->mi_cols;
1278   }
1279 }
1280
1281 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1282   VP9_COMMON *cm = &cpi->common;
1283   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1284
1285   if (!cpi->lookahead)
1286     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1287                                         cm->subsampling_x, cm->subsampling_y,
1288 #if CONFIG_VP9_HIGHBITDEPTH
1289                                         cm->use_highbitdepth,
1290 #endif
1291                                         oxcf->lag_in_frames);
1292   if (!cpi->lookahead)
1293     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1294                        "Failed to allocate lag buffers");
1295
1296   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1297   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1298                                cm->subsampling_x, cm->subsampling_y,
1299 #if CONFIG_VP9_HIGHBITDEPTH
1300                                cm->use_highbitdepth,
1301 #endif
1302                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1303                                NULL, NULL, NULL))
1304     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1305                        "Failed to allocate altref buffer");
1306 }
1307
1308 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1309   VP9_COMMON *const cm = &cpi->common;
1310   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1311                                cm->subsampling_x, cm->subsampling_y,
1312 #if CONFIG_VP9_HIGHBITDEPTH
1313                                cm->use_highbitdepth,
1314 #endif
1315                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1316                                NULL, NULL, NULL))
1317     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1318                        "Failed to allocate last frame buffer");
1319
1320   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1321                                cm->subsampling_x, cm->subsampling_y,
1322 #if CONFIG_VP9_HIGHBITDEPTH
1323                                cm->use_highbitdepth,
1324 #endif
1325                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1326                                NULL, NULL, NULL))
1327     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1328                        "Failed to allocate scaled source buffer");
1329
1330   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1331   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1332   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1333   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1334       cpi->svc.number_spatial_layers > 2) {
1335     cpi->svc.scaled_temp_is_alloc = 1;
1336     if (vpx_realloc_frame_buffer(
1337             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1338             cm->subsampling_x, cm->subsampling_y,
1339 #if CONFIG_VP9_HIGHBITDEPTH
1340             cm->use_highbitdepth,
1341 #endif
1342             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1343       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1344                          "Failed to allocate scaled_frame for svc ");
1345   }
1346
1347   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1348                                cm->subsampling_x, cm->subsampling_y,
1349 #if CONFIG_VP9_HIGHBITDEPTH
1350                                cm->use_highbitdepth,
1351 #endif
1352                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1353                                NULL, NULL, NULL))
1354     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1355                        "Failed to allocate scaled last source buffer");
1356 #ifdef ENABLE_KF_DENOISE
1357   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1358                                cm->subsampling_x, cm->subsampling_y,
1359 #if CONFIG_VP9_HIGHBITDEPTH
1360                                cm->use_highbitdepth,
1361 #endif
1362                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1363                                NULL, NULL, NULL))
1364     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1365                        "Failed to allocate unscaled raw source frame buffer");
1366
1367   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1368                                cm->subsampling_x, cm->subsampling_y,
1369 #if CONFIG_VP9_HIGHBITDEPTH
1370                                cm->use_highbitdepth,
1371 #endif
1372                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1373                                NULL, NULL, NULL))
1374     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1375                        "Failed to allocate scaled raw source frame buffer");
1376 #endif
1377 }
1378
1379 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1380   VP9_COMMON *cm = &cpi->common;
1381   int mi_size = cm->mi_cols * cm->mi_rows;
1382
1383   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1384   if (!cpi->mbmi_ext_base) return 1;
1385
1386   return 0;
1387 }
1388
1389 static void alloc_compressor_data(VP9_COMP *cpi) {
1390   VP9_COMMON *cm = &cpi->common;
1391   int sb_rows;
1392
1393   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1394
1395   alloc_context_buffers_ext(cpi);
1396
1397   vpx_free(cpi->tile_tok[0][0]);
1398
1399   {
1400     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1401     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1402                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1403   }
1404
1405   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1406   vpx_free(cpi->tplist[0][0]);
1407   CHECK_MEM_ERROR(
1408       cm, cpi->tplist[0][0],
1409       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1410
1411   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1412 }
1413
1414 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1415   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1416   vp9_rc_update_framerate(cpi);
1417 }
1418
1419 static void set_tile_limits(VP9_COMP *cpi) {
1420   VP9_COMMON *const cm = &cpi->common;
1421
1422   int min_log2_tile_cols, max_log2_tile_cols;
1423   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1424
1425   cm->log2_tile_cols =
1426       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1427   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1428
1429   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1430     const int level_tile_cols =
1431         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1432     if (cm->log2_tile_cols > level_tile_cols) {
1433       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1434     }
1435   }
1436 }
1437
1438 static void update_frame_size(VP9_COMP *cpi) {
1439   VP9_COMMON *const cm = &cpi->common;
1440   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1441
1442   vp9_set_mb_mi(cm, cm->width, cm->height);
1443   vp9_init_context_buffers(cm);
1444   vp9_init_macroblockd(cm, xd, NULL);
1445   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1446   memset(cpi->mbmi_ext_base, 0,
1447          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1448
1449   set_tile_limits(cpi);
1450 }
1451
1452 static void init_buffer_indices(VP9_COMP *cpi) {
1453   int ref_frame;
1454
1455   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1456     cpi->ref_fb_idx[ref_frame] = ref_frame;
1457
1458   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1459   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1460   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1461 }
1462
1463 static void init_level_constraint(LevelConstraint *lc) {
1464   lc->level_index = -1;
1465   lc->max_cpb_size = INT_MAX;
1466   lc->max_frame_size = INT_MAX;
1467   lc->fail_flag = 0;
1468 }
1469
1470 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1471   vpx_clear_system_state();
1472   ls->level_index = level_index;
1473   if (level_index >= 0) {
1474     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1475   }
1476 }
1477
1478 static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1479   VP9_COMMON *const cm = &cpi->common;
1480
1481   cpi->oxcf = *oxcf;
1482   cpi->framerate = oxcf->init_framerate;
1483   cm->profile = oxcf->profile;
1484   cm->bit_depth = oxcf->bit_depth;
1485 #if CONFIG_VP9_HIGHBITDEPTH
1486   cm->use_highbitdepth = oxcf->use_highbitdepth;
1487 #endif
1488   cm->color_space = oxcf->color_space;
1489   cm->color_range = oxcf->color_range;
1490
1491   cpi->target_level = oxcf->target_level;
1492   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1493   set_level_constraint(&cpi->level_constraint,
1494                        get_level_index(cpi->target_level));
1495
1496   cm->width = oxcf->width;
1497   cm->height = oxcf->height;
1498   alloc_compressor_data(cpi);
1499
1500   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1501
1502   // Single thread case: use counts in common.
1503   cpi->td.counts = &cm->counts;
1504
1505   // Spatial scalability.
1506   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1507   // Temporal scalability.
1508   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1509
1510   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1511       ((cpi->svc.number_temporal_layers > 1 ||
1512         cpi->svc.number_spatial_layers > 1) &&
1513        cpi->oxcf.pass != 1)) {
1514     vp9_init_layer_context(cpi);
1515   }
1516
1517   // change includes all joint functionality
1518   vp9_change_config(cpi, oxcf);
1519
1520   cpi->static_mb_pct = 0;
1521   cpi->ref_frame_flags = 0;
1522
1523   init_buffer_indices(cpi);
1524
1525   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1526 }
1527
1528 void vp9_check_reset_rc_flag(VP9_COMP *cpi) {
1529   RATE_CONTROL *rc = &cpi->rc;
1530
1531   if (cpi->common.current_video_frame >
1532       (unsigned int)cpi->svc.number_spatial_layers) {
1533     if (cpi->use_svc) {
1534       vp9_svc_check_reset_layer_rc_flag(cpi);
1535     } else {
1536       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
1537           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
1538         rc->rc_1_frame = 0;
1539         rc->rc_2_frame = 0;
1540         rc->bits_off_target = rc->optimal_buffer_level;
1541         rc->buffer_level = rc->optimal_buffer_level;
1542       }
1543     }
1544   }
1545 }
1546
1547 void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
1548   RATE_CONTROL *rc = &cpi->rc;
1549   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1550
1551   const int64_t bandwidth = oxcf->target_bandwidth;
1552   const int64_t starting = oxcf->starting_buffer_level_ms;
1553   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1554   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1555
1556   rc->starting_buffer_level = starting * bandwidth / 1000;
1557   rc->optimal_buffer_level =
1558       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1559   rc->maximum_buffer_size =
1560       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1561
1562   // Under a configuration change, where maximum_buffer_size may change,
1563   // keep buffer level clipped to the maximum allowed buffer size.
1564   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1565   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
1566 }
1567
1568 #if CONFIG_VP9_HIGHBITDEPTH
1569 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1570 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1571   cpi->fn_ptr[BT].sdf = SDF;                             \
1572   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1573   cpi->fn_ptr[BT].vf = VF;                               \
1574   cpi->fn_ptr[BT].svf = SVF;                             \
1575   cpi->fn_ptr[BT].svaf = SVAF;                           \
1576   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1577   cpi->fn_ptr[BT].sdx8f = NULL;
1578
1579 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1580   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1581                                      int source_stride,                        \
1582                                      const uint8_t *ref_ptr, int ref_stride) { \
1583     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1584   }                                                                            \
1585   static unsigned int fnname##_bits10(                                         \
1586       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1587       int ref_stride) {                                                        \
1588     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1589   }                                                                            \
1590   static unsigned int fnname##_bits12(                                         \
1591       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1592       int ref_stride) {                                                        \
1593     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1594   }
1595
1596 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1597   static unsigned int fnname##_bits8(                                          \
1598       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1599       int ref_stride, const uint8_t *second_pred) {                            \
1600     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1601   }                                                                            \
1602   static unsigned int fnname##_bits10(                                         \
1603       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1604       int ref_stride, const uint8_t *second_pred) {                            \
1605     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1606            2;                                                                  \
1607   }                                                                            \
1608   static unsigned int fnname##_bits12(                                         \
1609       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1610       int ref_stride, const uint8_t *second_pred) {                            \
1611     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1612            4;                                                                  \
1613   }
1614
1615 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1616   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1617                              const uint8_t *const ref_ptr[], int ref_stride,  \
1618                              unsigned int *sad_array) {                       \
1619     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1620   }                                                                           \
1621   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1622                               const uint8_t *const ref_ptr[], int ref_stride, \
1623                               unsigned int *sad_array) {                      \
1624     int i;                                                                    \
1625     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1626     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1627   }                                                                           \
1628   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1629                               const uint8_t *const ref_ptr[], int ref_stride, \
1630                               unsigned int *sad_array) {                      \
1631     int i;                                                                    \
1632     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1633     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1634   }
1635
1636 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1637 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1638 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1639 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1640 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1641 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1642 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1643 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1644 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1645 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1646 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1647 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1648 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1649 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1650 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1651 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1652 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1653 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1654 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1655 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1656 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1657 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1658 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1659 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1660 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1661 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1662 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1663 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1664 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1665 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1666 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1667 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1668 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1669 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1670 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1671 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1672 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1673 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1674 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1675
1676 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1677   VP9_COMMON *const cm = &cpi->common;
1678   if (cm->use_highbitdepth) {
1679     switch (cm->bit_depth) {
1680       case VPX_BITS_8:
1681         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1682                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1683                    vpx_highbd_8_sub_pixel_variance32x16,
1684                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1685                    vpx_highbd_sad32x16x4d_bits8)
1686
1687         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1688                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1689                    vpx_highbd_8_sub_pixel_variance16x32,
1690                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1691                    vpx_highbd_sad16x32x4d_bits8)
1692
1693         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1694                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1695                    vpx_highbd_8_sub_pixel_variance64x32,
1696                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1697                    vpx_highbd_sad64x32x4d_bits8)
1698
1699         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1700                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1701                    vpx_highbd_8_sub_pixel_variance32x64,
1702                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1703                    vpx_highbd_sad32x64x4d_bits8)
1704
1705         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1706                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1707                    vpx_highbd_8_sub_pixel_variance32x32,
1708                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1709                    vpx_highbd_sad32x32x4d_bits8)
1710
1711         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1712                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1713                    vpx_highbd_8_sub_pixel_variance64x64,
1714                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1715                    vpx_highbd_sad64x64x4d_bits8)
1716
1717         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1718                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1719                    vpx_highbd_8_sub_pixel_variance16x16,
1720                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1721                    vpx_highbd_sad16x16x4d_bits8)
1722
1723         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1724                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1725                    vpx_highbd_8_sub_pixel_variance16x8,
1726                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1727                    vpx_highbd_sad16x8x4d_bits8)
1728
1729         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1730                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1731                    vpx_highbd_8_sub_pixel_variance8x16,
1732                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1733                    vpx_highbd_sad8x16x4d_bits8)
1734
1735         HIGHBD_BFP(
1736             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1737             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1738             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1739
1740         HIGHBD_BFP(
1741             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1742             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1743             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1744
1745         HIGHBD_BFP(
1746             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1747             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1748             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1749
1750         HIGHBD_BFP(
1751             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1752             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1753             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1754         break;
1755
1756       case VPX_BITS_10:
1757         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1758                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1759                    vpx_highbd_10_sub_pixel_variance32x16,
1760                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1761                    vpx_highbd_sad32x16x4d_bits10)
1762
1763         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1764                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1765                    vpx_highbd_10_sub_pixel_variance16x32,
1766                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1767                    vpx_highbd_sad16x32x4d_bits10)
1768
1769         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1770                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1771                    vpx_highbd_10_sub_pixel_variance64x32,
1772                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1773                    vpx_highbd_sad64x32x4d_bits10)
1774
1775         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1776                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1777                    vpx_highbd_10_sub_pixel_variance32x64,
1778                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1779                    vpx_highbd_sad32x64x4d_bits10)
1780
1781         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1782                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1783                    vpx_highbd_10_sub_pixel_variance32x32,
1784                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1785                    vpx_highbd_sad32x32x4d_bits10)
1786
1787         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1788                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1789                    vpx_highbd_10_sub_pixel_variance64x64,
1790                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1791                    vpx_highbd_sad64x64x4d_bits10)
1792
1793         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1794                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1795                    vpx_highbd_10_sub_pixel_variance16x16,
1796                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1797                    vpx_highbd_sad16x16x4d_bits10)
1798
1799         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1800                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1801                    vpx_highbd_10_sub_pixel_variance16x8,
1802                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1803                    vpx_highbd_sad16x8x4d_bits10)
1804
1805         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1806                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1807                    vpx_highbd_10_sub_pixel_variance8x16,
1808                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1809                    vpx_highbd_sad8x16x4d_bits10)
1810
1811         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1812                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1813                    vpx_highbd_10_sub_pixel_variance8x8,
1814                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1815                    vpx_highbd_sad8x8x4d_bits10)
1816
1817         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1818                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1819                    vpx_highbd_10_sub_pixel_variance8x4,
1820                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1821                    vpx_highbd_sad8x4x4d_bits10)
1822
1823         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1824                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1825                    vpx_highbd_10_sub_pixel_variance4x8,
1826                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1827                    vpx_highbd_sad4x8x4d_bits10)
1828
1829         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1830                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1831                    vpx_highbd_10_sub_pixel_variance4x4,
1832                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1833                    vpx_highbd_sad4x4x4d_bits10)
1834         break;
1835
1836       default:
1837         assert(cm->bit_depth == VPX_BITS_12);
1838         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1839                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1840                    vpx_highbd_12_sub_pixel_variance32x16,
1841                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1842                    vpx_highbd_sad32x16x4d_bits12)
1843
1844         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1845                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1846                    vpx_highbd_12_sub_pixel_variance16x32,
1847                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1848                    vpx_highbd_sad16x32x4d_bits12)
1849
1850         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1851                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1852                    vpx_highbd_12_sub_pixel_variance64x32,
1853                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1854                    vpx_highbd_sad64x32x4d_bits12)
1855
1856         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1857                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1858                    vpx_highbd_12_sub_pixel_variance32x64,
1859                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1860                    vpx_highbd_sad32x64x4d_bits12)
1861
1862         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1863                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1864                    vpx_highbd_12_sub_pixel_variance32x32,
1865                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1866                    vpx_highbd_sad32x32x4d_bits12)
1867
1868         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1869                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1870                    vpx_highbd_12_sub_pixel_variance64x64,
1871                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1872                    vpx_highbd_sad64x64x4d_bits12)
1873
1874         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1875                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1876                    vpx_highbd_12_sub_pixel_variance16x16,
1877                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1878                    vpx_highbd_sad16x16x4d_bits12)
1879
1880         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1881                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1882                    vpx_highbd_12_sub_pixel_variance16x8,
1883                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1884                    vpx_highbd_sad16x8x4d_bits12)
1885
1886         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1887                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1888                    vpx_highbd_12_sub_pixel_variance8x16,
1889                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1890                    vpx_highbd_sad8x16x4d_bits12)
1891
1892         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1893                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1894                    vpx_highbd_12_sub_pixel_variance8x8,
1895                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1896                    vpx_highbd_sad8x8x4d_bits12)
1897
1898         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1899                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1900                    vpx_highbd_12_sub_pixel_variance8x4,
1901                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1902                    vpx_highbd_sad8x4x4d_bits12)
1903
1904         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1905                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1906                    vpx_highbd_12_sub_pixel_variance4x8,
1907                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1908                    vpx_highbd_sad4x8x4d_bits12)
1909
1910         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1911                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1912                    vpx_highbd_12_sub_pixel_variance4x4,
1913                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1914                    vpx_highbd_sad4x4x4d_bits12)
1915         break;
1916     }
1917   }
1918 }
1919 #endif  // CONFIG_VP9_HIGHBITDEPTH
1920
1921 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1922   VP9_COMMON *const cm = &cpi->common;
1923
1924   // Create the encoder segmentation map and set all entries to 0
1925   vpx_free(cpi->segmentation_map);
1926   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1927                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1928
1929   // Create a map used for cyclic background refresh.
1930   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1931   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1932                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1933
1934   // Create a map used to mark inactive areas.
1935   vpx_free(cpi->active_map.map);
1936   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1937                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1938
1939   // And a place holder structure is the coding context
1940   // for use if we want to save and restore it
1941   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1942   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1943                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1944 }
1945
1946 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1947   VP9_COMMON *const cm = &cpi->common;
1948   if (cpi->prev_partition == NULL) {
1949     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1950                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1951                                              sizeof(*cpi->prev_partition)));
1952   }
1953   if (cpi->prev_segment_id == NULL) {
1954     CHECK_MEM_ERROR(
1955         cm, cpi->prev_segment_id,
1956         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1957                              sizeof(*cpi->prev_segment_id)));
1958   }
1959   if (cpi->prev_variance_low == NULL) {
1960     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1961                     (uint8_t *)vpx_calloc(
1962                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1963                         sizeof(*cpi->prev_variance_low)));
1964   }
1965   if (cpi->copied_frame_cnt == NULL) {
1966     CHECK_MEM_ERROR(
1967         cm, cpi->copied_frame_cnt,
1968         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1969                               sizeof(*cpi->copied_frame_cnt)));
1970   }
1971 }
1972
1973 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1974   VP9_COMMON *const cm = &cpi->common;
1975   RATE_CONTROL *const rc = &cpi->rc;
1976   int last_w = cpi->oxcf.width;
1977   int last_h = cpi->oxcf.height;
1978
1979   vp9_init_quantizer(cpi);
1980   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1981   cm->bit_depth = oxcf->bit_depth;
1982   cm->color_space = oxcf->color_space;
1983   cm->color_range = oxcf->color_range;
1984
1985   cpi->target_level = oxcf->target_level;
1986   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1987   set_level_constraint(&cpi->level_constraint,
1988                        get_level_index(cpi->target_level));
1989
1990   if (cm->profile <= PROFILE_1)
1991     assert(cm->bit_depth == VPX_BITS_8);
1992   else
1993     assert(cm->bit_depth > VPX_BITS_8);
1994
1995   cpi->oxcf = *oxcf;
1996 #if CONFIG_VP9_HIGHBITDEPTH
1997   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1998 #endif  // CONFIG_VP9_HIGHBITDEPTH
1999
2000   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
2001     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
2002   } else {
2003     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
2004   }
2005
2006   cpi->refresh_golden_frame = 0;
2007   cpi->refresh_last_frame = 1;
2008   cm->refresh_frame_context = 1;
2009   cm->reset_frame_context = 0;
2010
2011   vp9_reset_segment_features(&cm->seg);
2012   vp9_set_high_precision_mv(cpi, 0);
2013
2014   {
2015     int i;
2016
2017     for (i = 0; i < MAX_SEGMENTS; i++)
2018       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
2019   }
2020   cpi->encode_breakout = cpi->oxcf.encode_breakout;
2021
2022   vp9_set_rc_buffer_sizes(cpi);
2023
2024   // Set up frame rate and related parameters rate control values.
2025   vp9_new_framerate(cpi, cpi->framerate);
2026
2027   // Set absolute upper and lower quality limits
2028   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2029   rc->best_quality = cpi->oxcf.best_allowed_q;
2030
2031   cm->interp_filter = cpi->sf.default_interp_filter;
2032
2033   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2034     cm->render_width = cpi->oxcf.render_width;
2035     cm->render_height = cpi->oxcf.render_height;
2036   } else {
2037     cm->render_width = cpi->oxcf.width;
2038     cm->render_height = cpi->oxcf.height;
2039   }
2040   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2041     cm->width = cpi->oxcf.width;
2042     cm->height = cpi->oxcf.height;
2043     cpi->external_resize = 1;
2044   }
2045
2046   if (cpi->initial_width) {
2047     int new_mi_size = 0;
2048     vp9_set_mb_mi(cm, cm->width, cm->height);
2049     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2050     if (cm->mi_alloc_size < new_mi_size) {
2051       vp9_free_context_buffers(cm);
2052       alloc_compressor_data(cpi);
2053       realloc_segmentation_maps(cpi);
2054       cpi->initial_width = cpi->initial_height = 0;
2055       cpi->external_resize = 0;
2056     } else if (cm->mi_alloc_size == new_mi_size &&
2057                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2058       vp9_alloc_loop_filter(cm);
2059     }
2060   }
2061
2062   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2063       last_h != cpi->oxcf.height)
2064     update_frame_size(cpi);
2065
2066   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2067     memset(cpi->consec_zero_mv, 0,
2068            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2069     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2070       vp9_cyclic_refresh_reset_resize(cpi);
2071     rc->rc_1_frame = 0;
2072     rc->rc_2_frame = 0;
2073   }
2074
2075   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2076       ((cpi->svc.number_temporal_layers > 1 ||
2077         cpi->svc.number_spatial_layers > 1) &&
2078        cpi->oxcf.pass != 1)) {
2079     vp9_update_layer_context_change_config(cpi,
2080                                            (int)cpi->oxcf.target_bandwidth);
2081   }
2082
2083   vp9_check_reset_rc_flag(cpi);
2084
2085   cpi->alt_ref_source = NULL;
2086   rc->is_src_frame_alt_ref = 0;
2087
2088 #if 0
2089   // Experimental RD Code
2090   cpi->frame_distortion = 0;
2091   cpi->last_frame_distortion = 0;
2092 #endif
2093
2094   set_tile_limits(cpi);
2095
2096   cpi->ext_refresh_frame_flags_pending = 0;
2097   cpi->ext_refresh_frame_context_pending = 0;
2098
2099 #if CONFIG_VP9_HIGHBITDEPTH
2100   highbd_set_var_fns(cpi);
2101 #endif
2102
2103   vp9_set_row_mt(cpi);
2104 }
2105
2106 #ifndef M_LOG2_E
2107 #define M_LOG2_E 0.693147180559945309417
2108 #endif
2109 #define log2f(x) (log(x) / (float)M_LOG2_E)
2110
2111 /***********************************************************************
2112  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2113  ***********************************************************************
2114  * The following 2 functions ('cal_nmvjointsadcost' and                *
2115  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2116  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2117  * function is generic, but the AVX intrinsics optimised version       *
2118  * relies on the following properties of the computed tables:          *
2119  * For cal_nmvjointsadcost:                                            *
2120  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2121  * For cal_nmvsadcosts:                                                *
2122  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2123  *         (Equal costs for both components)                           *
2124  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2125  *         (Cost function is even)                                     *
2126  * If these do not hold, then the AVX optimised version of the         *
2127  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2128  * case you can revert to using the C function instead.                *
2129  ***********************************************************************/
2130
2131 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2132   /*********************************************************************
2133    * Warning: Read the comments above before modifying this function   *
2134    *********************************************************************/
2135   mvjointsadcost[0] = 600;
2136   mvjointsadcost[1] = 300;
2137   mvjointsadcost[2] = 300;
2138   mvjointsadcost[3] = 300;
2139 }
2140
2141 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2142   /*********************************************************************
2143    * Warning: Read the comments above before modifying this function   *
2144    *********************************************************************/
2145   int i = 1;
2146
2147   mvsadcost[0][0] = 0;
2148   mvsadcost[1][0] = 0;
2149
2150   do {
2151     double z = 256 * (2 * (log2f(8 * i) + .6));
2152     mvsadcost[0][i] = (int)z;
2153     mvsadcost[1][i] = (int)z;
2154     mvsadcost[0][-i] = (int)z;
2155     mvsadcost[1][-i] = (int)z;
2156   } while (++i <= MV_MAX);
2157 }
2158
2159 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2160   int i = 1;
2161
2162   mvsadcost[0][0] = 0;
2163   mvsadcost[1][0] = 0;
2164
2165   do {
2166     double z = 256 * (2 * (log2f(8 * i) + .6));
2167     mvsadcost[0][i] = (int)z;
2168     mvsadcost[1][i] = (int)z;
2169     mvsadcost[0][-i] = (int)z;
2170     mvsadcost[1][-i] = (int)z;
2171   } while (++i <= MV_MAX);
2172 }
2173
2174 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2175   int i;
2176   BufferPool *const pool = cm->buffer_pool;
2177   cm->new_fb_idx = INVALID_IDX;
2178   for (i = 0; i < REF_FRAMES; ++i) {
2179     cm->ref_frame_map[i] = INVALID_IDX;
2180   }
2181   for (i = 0; i < FRAME_BUFFERS; ++i) {
2182     pool->frame_bufs[i].ref_count = 0;
2183   }
2184 }
2185
2186 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
2187                                  int subsampling_x, int subsampling_y) {
2188   VP9_COMMON *const cm = &cpi->common;
2189 #if !CONFIG_VP9_HIGHBITDEPTH
2190   (void)use_highbitdepth;
2191   assert(use_highbitdepth == 0);
2192 #endif
2193
2194   if (!cpi->initial_width ||
2195 #if CONFIG_VP9_HIGHBITDEPTH
2196       cm->use_highbitdepth != use_highbitdepth ||
2197 #endif
2198       cm->subsampling_x != subsampling_x ||
2199       cm->subsampling_y != subsampling_y) {
2200     cm->subsampling_x = subsampling_x;
2201     cm->subsampling_y = subsampling_y;
2202 #if CONFIG_VP9_HIGHBITDEPTH
2203     cm->use_highbitdepth = use_highbitdepth;
2204 #endif
2205     alloc_util_frame_buffers(cpi);
2206     cpi->initial_width = cm->width;
2207     cpi->initial_height = cm->height;
2208     cpi->initial_mbs = cm->MBs;
2209   }
2210 }
2211
2212 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2213 static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
2214                                               unsigned int *subsampling_x,
2215                                               unsigned int *subsampling_y) {
2216   switch (fmt) {
2217     case VPX_IMG_FMT_I420:
2218     case VPX_IMG_FMT_YV12:
2219     case VPX_IMG_FMT_I422:
2220     case VPX_IMG_FMT_I42016:
2221     case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
2222     default: *subsampling_x = 0; break;
2223   }
2224
2225   switch (fmt) {
2226     case VPX_IMG_FMT_I420:
2227     case VPX_IMG_FMT_I440:
2228     case VPX_IMG_FMT_YV12:
2229     case VPX_IMG_FMT_I42016:
2230     case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
2231     default: *subsampling_y = 0; break;
2232   }
2233 }
2234
2235 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2236 static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
2237   return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
2238 }
2239
2240 #if CONFIG_VP9_TEMPORAL_DENOISING
2241 static void setup_denoiser_buffer(VP9_COMP *cpi) {
2242   VP9_COMMON *const cm = &cpi->common;
2243   if (cpi->oxcf.noise_sensitivity > 0 &&
2244       !cpi->denoiser.frame_buffer_initialized) {
2245     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
2246                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
2247                            cm->subsampling_x, cm->subsampling_y,
2248 #if CONFIG_VP9_HIGHBITDEPTH
2249                            cm->use_highbitdepth,
2250 #endif
2251                            VP9_ENC_BORDER_IN_PIXELS))
2252       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
2253                          "Failed to allocate denoiser");
2254   }
2255 }
2256 #endif
2257
2258 void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
2259   const VP9EncoderConfig *oxcf = &cpi->oxcf;
2260   unsigned int subsampling_x, subsampling_y;
2261   const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
2262   vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
2263
2264   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
2265 #if CONFIG_VP9_TEMPORAL_DENOISING
2266   setup_denoiser_buffer(cpi);
2267 #endif
2268
2269   assert(cpi->lookahead == NULL);
2270   cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
2271                                       subsampling_y,
2272 #if CONFIG_VP9_HIGHBITDEPTH
2273                                       use_highbitdepth,
2274 #endif
2275                                       oxcf->lag_in_frames);
2276   alloc_raw_frame_buffers(cpi);
2277 }
2278
2279 VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
2280                                 BufferPool *const pool) {
2281   unsigned int i;
2282   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2283   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2284
2285   if (!cm) return NULL;
2286
2287   vp9_zero(*cpi);
2288
2289   if (setjmp(cm->error.jmp)) {
2290     cm->error.setjmp = 0;
2291     vp9_remove_compressor(cpi);
2292     return 0;
2293   }
2294
2295   cm->error.setjmp = 1;
2296   cm->alloc_mi = vp9_enc_alloc_mi;
2297   cm->free_mi = vp9_enc_free_mi;
2298   cm->setup_mi = vp9_enc_setup_mi;
2299
2300   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2301   CHECK_MEM_ERROR(
2302       cm, cm->frame_contexts,
2303       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2304
2305   cpi->use_svc = 0;
2306   cpi->resize_state = ORIG;
2307   cpi->external_resize = 0;
2308   cpi->resize_avg_qp = 0;
2309   cpi->resize_buffer_underflow = 0;
2310   cpi->use_skin_detection = 0;
2311   cpi->common.buffer_pool = pool;
2312   init_ref_frame_bufs(cm);
2313
2314   cpi->force_update_segmentation = 0;
2315
2316   init_config(cpi, oxcf);
2317   cpi->frame_info = vp9_get_frame_info(oxcf);
2318
2319   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2320
2321   init_frame_indexes(cm);
2322   cpi->partition_search_skippable_frame = 0;
2323   cpi->tile_data = NULL;
2324
2325   realloc_segmentation_maps(cpi);
2326
2327   CHECK_MEM_ERROR(
2328       cm, cpi->skin_map,
2329       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2330
2331 #if !CONFIG_REALTIME_ONLY
2332   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2333 #endif
2334
2335   CHECK_MEM_ERROR(
2336       cm, cpi->consec_zero_mv,
2337       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2338
2339   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2340                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2341   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2342                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2343   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2344                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2345   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2346                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2347   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2348                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2349   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2350                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2351   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2352                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2353   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2354                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2355
2356   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2357        i++) {
2358     CHECK_MEM_ERROR(
2359         cm, cpi->mbgraph_stats[i].mb_stats,
2360         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2361   }
2362
2363 #if CONFIG_FP_MB_STATS
2364   cpi->use_fp_mb_stats = 0;
2365   if (cpi->use_fp_mb_stats) {
2366     // a place holder used to store the first pass mb stats in the first pass
2367     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2368                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2369   } else {
2370     cpi->twopass.frame_mb_stats_buf = NULL;
2371   }
2372 #endif
2373
2374   cpi->refresh_alt_ref_frame = 0;
2375   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2376
2377   init_level_info(&cpi->level_info);
2378   init_level_constraint(&cpi->level_constraint);
2379
2380 #if CONFIG_INTERNAL_STATS
2381   cpi->b_calculate_blockiness = 1;
2382   cpi->b_calculate_consistency = 1;
2383   cpi->total_inconsistency = 0;
2384   cpi->psnr.worst = 100.0;
2385   cpi->worst_ssim = 100.0;
2386
2387   cpi->count = 0;
2388   cpi->bytes = 0;
2389
2390   if (cpi->b_calculate_psnr) {
2391     cpi->total_sq_error = 0;
2392     cpi->total_samples = 0;
2393
2394     cpi->totalp_sq_error = 0;
2395     cpi->totalp_samples = 0;
2396
2397     cpi->tot_recode_hits = 0;
2398     cpi->summed_quality = 0;
2399     cpi->summed_weights = 0;
2400     cpi->summedp_quality = 0;
2401     cpi->summedp_weights = 0;
2402   }
2403
2404   cpi->fastssim.worst = 100.0;
2405
2406   cpi->psnrhvs.worst = 100.0;
2407
2408   if (cpi->b_calculate_blockiness) {
2409     cpi->total_blockiness = 0;
2410     cpi->worst_blockiness = 0.0;
2411   }
2412
2413   if (cpi->b_calculate_consistency) {
2414     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2415                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2416                                sizeof(*cpi->ssim_vars) * 4));
2417     cpi->worst_consistency = 100.0;
2418   } else {
2419     cpi->ssim_vars = NULL;
2420   }
2421
2422 #endif
2423
2424   cpi->first_time_stamp_ever = INT64_MAX;
2425
2426   /*********************************************************************
2427    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2428    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2429    *********************************************************************/
2430   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2431   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2432   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2433   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2434   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2435   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2436
2437   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2438   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2439   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2440   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2441   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2442
2443 #if CONFIG_VP9_TEMPORAL_DENOISING
2444 #ifdef OUTPUT_YUV_DENOISED
2445   yuv_denoised_file = fopen("denoised.yuv", "ab");
2446 #endif
2447 #endif
2448 #ifdef OUTPUT_YUV_SKINMAP
2449   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2450 #endif
2451 #ifdef OUTPUT_YUV_REC
2452   yuv_rec_file = fopen("rec.yuv", "wb");
2453 #endif
2454 #ifdef OUTPUT_YUV_SVC_SRC
2455   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2456   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2457   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2458 #endif
2459
2460 #if 0
2461   framepsnr = fopen("framepsnr.stt", "a");
2462   kf_list = fopen("kf_list.stt", "w");
2463 #endif
2464
2465   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2466
2467   vp9_extrc_init(&cpi->ext_ratectrl);
2468
2469 #if !CONFIG_REALTIME_ONLY
2470   if (oxcf->pass == 1) {
2471     vp9_init_first_pass(cpi);
2472   } else if (oxcf->pass == 2) {
2473     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2474     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2475
2476     if (cpi->svc.number_spatial_layers > 1 ||
2477         cpi->svc.number_temporal_layers > 1) {
2478       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2479       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2480       int i;
2481
2482       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2483         FIRSTPASS_STATS *const last_packet_for_layer =
2484             &stats[packets - oxcf->ss_number_layers + i];
2485         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2486         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2487         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2488           int num_frames;
2489           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2490
2491           vpx_free(lc->rc_twopass_stats_in.buf);
2492
2493           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2494           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2495                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2496           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2497           lc->twopass.stats_in = lc->twopass.stats_in_start;
2498           lc->twopass.stats_in_end =
2499               lc->twopass.stats_in_start + packets_in_layer - 1;
2500           // Note the last packet is cumulative first pass stats.
2501           // So the number of frames is packet number minus one
2502           num_frames = packets_in_layer - 1;
2503           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2504                                    lc->rc_twopass_stats_in.buf, num_frames);
2505           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2506         }
2507       }
2508
2509       for (i = 0; i < packets; ++i) {
2510         const int layer_id = (int)stats[i].spatial_layer_id;
2511         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2512             stats_copy[layer_id] != NULL) {
2513           *stats_copy[layer_id] = stats[i];
2514           ++stats_copy[layer_id];
2515         }
2516       }
2517
2518       vp9_init_second_pass_spatial_svc(cpi);
2519     } else {
2520       int num_frames;
2521 #if CONFIG_FP_MB_STATS
2522       if (cpi->use_fp_mb_stats) {
2523         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2524         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2525
2526         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2527             oxcf->firstpass_mb_stats_in.buf;
2528         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2529             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2530             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2531       }
2532 #endif
2533
2534       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2535       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2536       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2537       // Note the last packet is cumulative first pass stats.
2538       // So the number of frames is packet number minus one
2539       num_frames = packets - 1;
2540       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2541                                oxcf->two_pass_stats_in.buf, num_frames);
2542
2543       vp9_init_second_pass(cpi);
2544     }
2545   }
2546 #endif  // !CONFIG_REALTIME_ONLY
2547
2548   cpi->mb_wiener_var_cols = 0;
2549   cpi->mb_wiener_var_rows = 0;
2550   cpi->mb_wiener_variance = NULL;
2551
2552   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2553   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2554
2555   {
2556     const int bsize = BLOCK_16X16;
2557     const int w = num_8x8_blocks_wide_lookup[bsize];
2558     const int h = num_8x8_blocks_high_lookup[bsize];
2559     const int num_cols = (cm->mi_cols + w - 1) / w;
2560     const int num_rows = (cm->mi_rows + h - 1) / h;
2561     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2562                     vpx_calloc(num_rows * num_cols,
2563                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2564   }
2565
2566   cpi->kmeans_data_arr_alloc = 0;
2567 #if CONFIG_NON_GREEDY_MV
2568   cpi->tpl_ready = 0;
2569 #endif  // CONFIG_NON_GREEDY_MV
2570   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2571
2572   // Allocate memory to store variances for a frame.
2573   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2574   cpi->source_var_thresh = 0;
2575   cpi->frames_till_next_var_check = 0;
2576 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2577   cpi->fn_ptr[BT].sdf = SDF;                             \
2578   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2579   cpi->fn_ptr[BT].vf = VF;                               \
2580   cpi->fn_ptr[BT].svf = SVF;                             \
2581   cpi->fn_ptr[BT].svaf = SVAF;                           \
2582   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2583   cpi->fn_ptr[BT].sdx8f = SDX8F;
2584
2585   // TODO(angiebird): make sdx8f available for every block size
2586   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2587       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2588       vpx_sad32x16x4d, NULL)
2589
2590   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2591       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2592       vpx_sad16x32x4d, NULL)
2593
2594   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2595       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2596       vpx_sad64x32x4d, NULL)
2597
2598   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2599       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2600       vpx_sad32x64x4d, NULL)
2601
2602   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2603       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2604       vpx_sad32x32x4d, vpx_sad32x32x8)
2605
2606   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2607       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2608       vpx_sad64x64x4d, NULL)
2609
2610   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2611       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2612       vpx_sad16x16x4d, vpx_sad16x16x8)
2613
2614   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2615       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2616       vpx_sad16x8x4d, vpx_sad16x8x8)
2617
2618   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2619       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2620       vpx_sad8x16x4d, vpx_sad8x16x8)
2621
2622   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2623       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2624       vpx_sad8x8x8)
2625
2626   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2627       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2628       NULL)
2629
2630   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2631       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2632       NULL)
2633
2634   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2635       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2636       vpx_sad4x4x8)
2637
2638 #if CONFIG_VP9_HIGHBITDEPTH
2639   highbd_set_var_fns(cpi);
2640 #endif
2641
2642   /* vp9_init_quantizer() is first called here. Add check in
2643    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2644    * called later when needed. This will avoid unnecessary calls of
2645    * vp9_init_quantizer() for every frame.
2646    */
2647   vp9_init_quantizer(cpi);
2648
2649   vp9_loop_filter_init(cm);
2650
2651   // Set up the unit scaling factor used during motion search.
2652 #if CONFIG_VP9_HIGHBITDEPTH
2653   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2654                                     cm->width, cm->height,
2655                                     cm->use_highbitdepth);
2656 #else
2657   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2658                                     cm->width, cm->height);
2659 #endif  // CONFIG_VP9_HIGHBITDEPTH
2660   cpi->td.mb.me_sf = &cpi->me_sf;
2661
2662   cm->error.setjmp = 0;
2663
2664 #if CONFIG_RATE_CTRL
2665   encode_command_init(&cpi->encode_command);
2666   partition_info_init(cpi);
2667   motion_vector_info_init(cpi);
2668   fp_motion_vector_info_init(cpi);
2669   tpl_stats_info_init(cpi);
2670 #endif
2671
2672   return cpi;
2673 }
2674
2675 #if CONFIG_INTERNAL_STATS
2676 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2677
2678 #define SNPRINT2(H, T, V) \
2679   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2680 #endif  // CONFIG_INTERNAL_STATS
2681
2682 static void free_tpl_buffer(VP9_COMP *cpi);
2683
2684 void vp9_remove_compressor(VP9_COMP *cpi) {
2685   VP9_COMMON *cm;
2686   unsigned int i;
2687   int t;
2688
2689   if (!cpi) return;
2690
2691 #if CONFIG_INTERNAL_STATS
2692   vpx_free(cpi->ssim_vars);
2693 #endif
2694
2695   cm = &cpi->common;
2696   if (cm->current_video_frame > 0) {
2697 #if CONFIG_INTERNAL_STATS
2698     vpx_clear_system_state();
2699
2700     if (cpi->oxcf.pass != 1) {
2701       char headings[512] = { 0 };
2702       char results[512] = { 0 };
2703       FILE *f = fopen("opsnr.stt", "a");
2704       double time_encoded =
2705           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2706           10000000.000;
2707       double total_encode_time =
2708           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2709       const double dr =
2710           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2711       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2712       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2713       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2714
2715       if (cpi->b_calculate_psnr) {
2716         const double total_psnr = vpx_sse_to_psnr(
2717             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2718         const double totalp_psnr = vpx_sse_to_psnr(
2719             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2720         const double total_ssim =
2721             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2722         const double totalp_ssim =
2723             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2724
2725         snprintf(headings, sizeof(headings),
2726                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2727                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2728                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2729                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2730         snprintf(results, sizeof(results),
2731                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2732                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2733                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2734                  "%7.3f\t%7.3f\t%7.3f",
2735                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2736                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2737                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2738                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2739                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2740                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2741                  cpi->psnr.stat[V] / cpi->count);
2742
2743         if (cpi->b_calculate_blockiness) {
2744           SNPRINT(headings, "\t  Block\tWstBlck");
2745           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2746           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2747         }
2748
2749         if (cpi->b_calculate_consistency) {
2750           double consistency =
2751               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2752                               (double)cpi->total_inconsistency);
2753
2754           SNPRINT(headings, "\tConsist\tWstCons");
2755           SNPRINT2(results, "\t%7.3f", consistency);
2756           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2757         }
2758
2759         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2760         SNPRINT2(results, "\t%8.0f", total_encode_time);
2761         SNPRINT2(results, "\t%7.2f", rate_err);
2762         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2763
2764         fprintf(f, "%s\tAPsnr611\n", headings);
2765         fprintf(
2766             f, "%s\t%7.3f\n", results,
2767             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2768                 (cpi->count * 8));
2769       }
2770
2771       fclose(f);
2772     }
2773 #endif
2774
2775 #if 0
2776     {
2777       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2778       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2779       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2780              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2781              cpi->time_compress_data / 1000,
2782              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2783     }
2784 #endif
2785   }
2786
2787 #if CONFIG_VP9_TEMPORAL_DENOISING
2788   vp9_denoiser_free(&(cpi->denoiser));
2789 #endif
2790
2791   if (cpi->kmeans_data_arr_alloc) {
2792 #if CONFIG_MULTITHREAD
2793     pthread_mutex_destroy(&cpi->kmeans_mutex);
2794 #endif
2795     vpx_free(cpi->kmeans_data_arr);
2796   }
2797
2798   free_tpl_buffer(cpi);
2799
2800   for (t = 0; t < cpi->num_workers; ++t) {
2801     VPxWorker *const worker = &cpi->workers[t];
2802     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2803
2804     // Deallocate allocated threads.
2805     vpx_get_worker_interface()->end(worker);
2806
2807     // Deallocate allocated thread data.
2808     if (t < cpi->num_workers - 1) {
2809       vpx_free(thread_data->td->counts);
2810       vp9_free_pc_tree(thread_data->td);
2811       vpx_free(thread_data->td);
2812     }
2813   }
2814   vpx_free(cpi->tile_thr_data);
2815   vpx_free(cpi->workers);
2816   vp9_row_mt_mem_dealloc(cpi);
2817
2818   if (cpi->num_workers > 1) {
2819     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2820     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2821   }
2822
2823 #if !CONFIG_REALTIME_ONLY
2824   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2825 #endif
2826
2827   dealloc_compressor_data(cpi);
2828
2829   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2830        ++i) {
2831     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2832   }
2833
2834 #if CONFIG_FP_MB_STATS
2835   if (cpi->use_fp_mb_stats) {
2836     vpx_free(cpi->twopass.frame_mb_stats_buf);
2837     cpi->twopass.frame_mb_stats_buf = NULL;
2838   }
2839 #endif
2840
2841   vp9_extrc_delete(&cpi->ext_ratectrl);
2842
2843   vp9_remove_common(cm);
2844   vp9_free_ref_frame_buffers(cm->buffer_pool);
2845 #if CONFIG_VP9_POSTPROC
2846   vp9_free_postproc_buffers(cm);
2847 #endif
2848   vpx_free(cpi);
2849
2850 #if CONFIG_VP9_TEMPORAL_DENOISING
2851 #ifdef OUTPUT_YUV_DENOISED
2852   fclose(yuv_denoised_file);
2853 #endif
2854 #endif
2855 #ifdef OUTPUT_YUV_SKINMAP
2856   fclose(yuv_skinmap_file);
2857 #endif
2858 #ifdef OUTPUT_YUV_REC
2859   fclose(yuv_rec_file);
2860 #endif
2861 #ifdef OUTPUT_YUV_SVC_SRC
2862   fclose(yuv_svc_src[0]);
2863   fclose(yuv_svc_src[1]);
2864   fclose(yuv_svc_src[2]);
2865 #endif
2866
2867 #if 0
2868
2869   if (keyfile)
2870     fclose(keyfile);
2871
2872   if (framepsnr)
2873     fclose(framepsnr);
2874
2875   if (kf_list)
2876     fclose(kf_list);
2877
2878 #endif
2879 }
2880
2881 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2882   if (is_psnr_calc_enabled(cpi)) {
2883 #if CONFIG_VP9_HIGHBITDEPTH
2884     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2885                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2886 #else
2887     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2888 #endif
2889     return 1;
2890   } else {
2891     vp9_zero(*psnr);
2892     return 0;
2893   }
2894 }
2895
2896 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2897   if (ref_frame_flags > 7) return -1;
2898
2899   cpi->ref_frame_flags = ref_frame_flags;
2900   return 0;
2901 }
2902
2903 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2904   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2905   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2906   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2907   cpi->ext_refresh_frame_flags_pending = 1;
2908 }
2909
2910 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2911     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2912   MV_REFERENCE_FRAME ref_frame = NONE;
2913   if (ref_frame_flag == VP9_LAST_FLAG)
2914     ref_frame = LAST_FRAME;
2915   else if (ref_frame_flag == VP9_GOLD_FLAG)
2916     ref_frame = GOLDEN_FRAME;
2917   else if (ref_frame_flag == VP9_ALT_FLAG)
2918     ref_frame = ALTREF_FRAME;
2919
2920   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2921 }
2922
2923 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2924                            YV12_BUFFER_CONFIG *sd) {
2925   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2926   if (cfg) {
2927     vpx_yv12_copy_frame(cfg, sd);
2928     return 0;
2929   } else {
2930     return -1;
2931   }
2932 }
2933
2934 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2935                           YV12_BUFFER_CONFIG *sd) {
2936   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2937   if (cfg) {
2938     vpx_yv12_copy_frame(sd, cfg);
2939     return 0;
2940   } else {
2941     return -1;
2942   }
2943 }
2944
2945 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2946   cpi->ext_refresh_frame_context = update;
2947   cpi->ext_refresh_frame_context_pending = 1;
2948   return 0;
2949 }
2950
2951 #ifdef OUTPUT_YUV_REC
2952 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2953   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2954   uint8_t *src = s->y_buffer;
2955   int h = cm->height;
2956
2957 #if CONFIG_VP9_HIGHBITDEPTH
2958   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2959     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2960
2961     do {
2962       fwrite(src16, s->y_width, 2, yuv_rec_file);
2963       src16 += s->y_stride;
2964     } while (--h);
2965
2966     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2967     h = s->uv_height;
2968
2969     do {
2970       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2971       src16 += s->uv_stride;
2972     } while (--h);
2973
2974     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2975     h = s->uv_height;
2976
2977     do {
2978       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2979       src16 += s->uv_stride;
2980     } while (--h);
2981
2982     fflush(yuv_rec_file);
2983     return;
2984   }
2985 #endif  // CONFIG_VP9_HIGHBITDEPTH
2986
2987   do {
2988     fwrite(src, s->y_width, 1, yuv_rec_file);
2989     src += s->y_stride;
2990   } while (--h);
2991
2992   src = s->u_buffer;
2993   h = s->uv_height;
2994
2995   do {
2996     fwrite(src, s->uv_width, 1, yuv_rec_file);
2997     src += s->uv_stride;
2998   } while (--h);
2999
3000   src = s->v_buffer;
3001   h = s->uv_height;
3002
3003   do {
3004     fwrite(src, s->uv_width, 1, yuv_rec_file);
3005     src += s->uv_stride;
3006   } while (--h);
3007
3008   fflush(yuv_rec_file);
3009 }
3010 #endif
3011
3012 #if CONFIG_VP9_HIGHBITDEPTH
3013 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3014                                                 YV12_BUFFER_CONFIG *dst,
3015                                                 int bd) {
3016 #else
3017 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3018                                                 YV12_BUFFER_CONFIG *dst) {
3019 #endif  // CONFIG_VP9_HIGHBITDEPTH
3020   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
3021   int i;
3022   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3023                                    src->v_buffer };
3024   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3025   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
3026                               src->uv_crop_width };
3027   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
3028                                src->uv_crop_height };
3029   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3030   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3031   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
3032                               dst->uv_crop_width };
3033   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
3034                                dst->uv_crop_height };
3035
3036   for (i = 0; i < MAX_MB_PLANE; ++i) {
3037 #if CONFIG_VP9_HIGHBITDEPTH
3038     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3039       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
3040                               src_strides[i], dsts[i], dst_heights[i],
3041                               dst_widths[i], dst_strides[i], bd);
3042     } else {
3043       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3044                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3045     }
3046 #else
3047     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3048                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3049 #endif  // CONFIG_VP9_HIGHBITDEPTH
3050   }
3051   vpx_extend_frame_borders(dst);
3052 }
3053
3054 #if CONFIG_VP9_HIGHBITDEPTH
3055 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
3056                                    YV12_BUFFER_CONFIG *dst, int bd,
3057                                    INTERP_FILTER filter_type,
3058                                    int phase_scaler) {
3059   const int src_w = src->y_crop_width;
3060   const int src_h = src->y_crop_height;
3061   const int dst_w = dst->y_crop_width;
3062   const int dst_h = dst->y_crop_height;
3063   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3064                                    src->v_buffer };
3065   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3066   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3067   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3068   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
3069   int x, y, i;
3070
3071   for (i = 0; i < MAX_MB_PLANE; ++i) {
3072     const int factor = (i == 0 || i == 3 ? 1 : 2);
3073     const int src_stride = src_strides[i];
3074     const int dst_stride = dst_strides[i];
3075     for (y = 0; y < dst_h; y += 16) {
3076       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
3077       for (x = 0; x < dst_w; x += 16) {
3078         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
3079         const uint8_t *src_ptr = srcs[i] +
3080                                  (y / factor) * src_h / dst_h * src_stride +
3081                                  (x / factor) * src_w / dst_w;
3082         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
3083
3084         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3085           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
3086                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
3087                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3088                                16 * src_h / dst_h, 16 / factor, 16 / factor,
3089                                bd);
3090         } else {
3091           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
3092                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3093                         16 * src_h / dst_h, 16 / factor, 16 / factor);
3094         }
3095       }
3096     }
3097   }
3098
3099   vpx_extend_frame_borders(dst);
3100 }
3101 #endif  // CONFIG_VP9_HIGHBITDEPTH
3102
3103 #if !CONFIG_REALTIME_ONLY
3104 static int scale_down(VP9_COMP *cpi, int q) {
3105   RATE_CONTROL *const rc = &cpi->rc;
3106   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3107   int scale = 0;
3108   assert(frame_is_kf_gf_arf(cpi));
3109
3110   if (rc->frame_size_selector == UNSCALED &&
3111       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
3112     const int max_size_thresh =
3113         (int)(rate_thresh_mult[SCALE_STEP1] *
3114               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
3115     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
3116   }
3117   return scale;
3118 }
3119
3120 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3121   const RATE_CONTROL *const rc = &cpi->rc;
3122   int big_miss_high;
3123
3124   if (frame_is_kf_gf_arf(cpi))
3125     big_miss_high = rc->this_frame_target * 3 / 2;
3126   else
3127     big_miss_high = rc->this_frame_target * 2;
3128
3129   return big_miss_high;
3130 }
3131
3132 static int big_rate_miss(VP9_COMP *cpi) {
3133   const RATE_CONTROL *const rc = &cpi->rc;
3134   int big_miss_high;
3135   int big_miss_low;
3136
3137   // Ignore for overlay frames
3138   if (rc->is_src_frame_alt_ref) {
3139     return 0;
3140   } else {
3141     big_miss_low = (rc->this_frame_target / 2);
3142     big_miss_high = big_rate_miss_high_threshold(cpi);
3143
3144     return (rc->projected_frame_size > big_miss_high) ||
3145            (rc->projected_frame_size < big_miss_low);
3146   }
3147 }
3148
3149 // test in two pass for the first
3150 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3151   if (cpi->oxcf.pass == 2) {
3152     TWO_PASS *const twopass = &cpi->twopass;
3153     GF_GROUP *const gf_group = &twopass->gf_group;
3154     const int gfg_index = gf_group->index;
3155
3156     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3157     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3158            gf_group->update_type[gfg_index] == LF_UPDATE;
3159   } else {
3160     return 0;
3161   }
3162 }
3163
3164 // Function to test for conditions that indicate we should loop
3165 // back and recode a frame.
3166 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3167                             int maxq, int minq) {
3168   const RATE_CONTROL *const rc = &cpi->rc;
3169   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3170   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3171   int force_recode = 0;
3172
3173   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3174       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3175       (two_pass_first_group_inter(cpi) &&
3176        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3177       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3178     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3179         scale_down(cpi, q)) {
3180       // Code this group at a lower resolution.
3181       cpi->resize_pending = 1;
3182       return 1;
3183     }
3184
3185     // Force recode for extreme overshoot.
3186     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3187         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3188          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3189       return 1;
3190     }
3191
3192     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3193     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3194         (rc->projected_frame_size < low_limit && q > minq)) {
3195       force_recode = 1;
3196     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3197       // Deal with frame undershoot and whether or not we are
3198       // below the automatically set cq level.
3199       if (q > oxcf->cq_level &&
3200           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3201         force_recode = 1;
3202       }
3203     }
3204   }
3205   return force_recode;
3206 }
3207 #endif  // !CONFIG_REALTIME_ONLY
3208
3209 static void update_ref_frames(VP9_COMP *cpi) {
3210   VP9_COMMON *const cm = &cpi->common;
3211   BufferPool *const pool = cm->buffer_pool;
3212   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3213
3214   if (cpi->rc.show_arf_as_gld) {
3215     int tmp = cpi->alt_fb_idx;
3216     cpi->alt_fb_idx = cpi->gld_fb_idx;
3217     cpi->gld_fb_idx = tmp;
3218   } else if (cm->show_existing_frame) {
3219     // Pop ARF.
3220     cpi->lst_fb_idx = cpi->alt_fb_idx;
3221     cpi->alt_fb_idx =
3222         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3223     --gf_group->stack_size;
3224   }
3225
3226   // At this point the new frame has been encoded.
3227   // If any buffer copy / swapping is signaled it should be done here.
3228   if (cm->frame_type == KEY_FRAME) {
3229     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3230                cm->new_fb_idx);
3231     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3232                cm->new_fb_idx);
3233   } else if (vp9_preserve_existing_gf(cpi)) {
3234     // We have decided to preserve the previously existing golden frame as our
3235     // new ARF frame. However, in the short term in function
3236     // vp9_get_refresh_mask() we left it in the GF slot and, if
3237     // we're updating the GF with the current decoded frame, we save it to the
3238     // ARF slot instead.
3239     // We now have to update the ARF with the current frame and swap gld_fb_idx
3240     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3241     // slot and, if we're updating the GF, the current frame becomes the new GF.
3242     int tmp;
3243
3244     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3245                cm->new_fb_idx);
3246
3247     tmp = cpi->alt_fb_idx;
3248     cpi->alt_fb_idx = cpi->gld_fb_idx;
3249     cpi->gld_fb_idx = tmp;
3250   } else { /* For non key/golden frames */
3251     if (cpi->refresh_alt_ref_frame) {
3252       int arf_idx = gf_group->top_arf_idx;
3253
3254       // Push new ARF into stack.
3255       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3256                  gf_group->stack_size);
3257       ++gf_group->stack_size;
3258
3259       assert(arf_idx < REF_FRAMES);
3260
3261       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3262       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3263              cpi->interp_filter_selected[0],
3264              sizeof(cpi->interp_filter_selected[0]));
3265
3266       cpi->alt_fb_idx = arf_idx;
3267     }
3268
3269     if (cpi->refresh_golden_frame) {
3270       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3271                  cm->new_fb_idx);
3272       if (!cpi->rc.is_src_frame_alt_ref)
3273         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3274                cpi->interp_filter_selected[0],
3275                sizeof(cpi->interp_filter_selected[0]));
3276       else
3277         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3278                cpi->interp_filter_selected[ALTREF_FRAME],
3279                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3280     }
3281   }
3282
3283   if (cpi->refresh_last_frame) {
3284     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3285                cm->new_fb_idx);
3286     if (!cpi->rc.is_src_frame_alt_ref)
3287       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3288              cpi->interp_filter_selected[0],
3289              sizeof(cpi->interp_filter_selected[0]));
3290   }
3291
3292   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3293     cpi->alt_fb_idx =
3294         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3295     --gf_group->stack_size;
3296   }
3297 }
3298
3299 void vp9_update_reference_frames(VP9_COMP *cpi) {
3300   update_ref_frames(cpi);
3301
3302 #if CONFIG_VP9_TEMPORAL_DENOISING
3303   vp9_denoiser_update_ref_frame(cpi);
3304 #endif
3305
3306   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3307 }
3308
3309 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3310   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3311   struct loopfilter *lf = &cm->lf;
3312   int is_reference_frame =
3313       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3314        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3315   if (cpi->use_svc &&
3316       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3317     is_reference_frame = !cpi->svc.non_reference_frame;
3318
3319   // Skip loop filter in show_existing_frame mode.
3320   if (cm->show_existing_frame) {
3321     lf->filter_level = 0;
3322     return;
3323   }
3324
3325   if (cpi->loopfilter_ctrl == NO_LOOPFILTER ||
3326       (!is_reference_frame && cpi->loopfilter_ctrl == LOOPFILTER_REFERENCE)) {
3327     lf->filter_level = 0;
3328     vpx_extend_frame_inner_borders(cm->frame_to_show);
3329     return;
3330   }
3331
3332   if (xd->lossless) {
3333     lf->filter_level = 0;
3334     lf->last_filt_level = 0;
3335   } else {
3336     struct vpx_usec_timer timer;
3337
3338     vpx_clear_system_state();
3339
3340     vpx_usec_timer_start(&timer);
3341
3342     if (!cpi->rc.is_src_frame_alt_ref) {
3343       if ((cpi->common.frame_type == KEY_FRAME) &&
3344           (!cpi->rc.this_key_frame_forced)) {
3345         lf->last_filt_level = 0;
3346       }
3347       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3348       lf->last_filt_level = lf->filter_level;
3349     } else {
3350       lf->filter_level = 0;
3351     }
3352
3353     vpx_usec_timer_mark(&timer);
3354     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3355   }
3356
3357   if (lf->filter_level > 0 && is_reference_frame) {
3358     vp9_build_mask_frame(cm, lf->filter_level, 0);
3359
3360     if (cpi->num_workers > 1)
3361       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3362                                lf->filter_level, 0, 0, cpi->workers,
3363                                cpi->num_workers, &cpi->lf_row_sync);
3364     else
3365       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3366   }
3367
3368   vpx_extend_frame_inner_borders(cm->frame_to_show);
3369 }
3370
3371 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3372   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3373   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3374       new_fb_ptr->mi_cols < cm->mi_cols) {
3375     vpx_free(new_fb_ptr->mvs);
3376     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3377                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3378                                          sizeof(*new_fb_ptr->mvs)));
3379     new_fb_ptr->mi_rows = cm->mi_rows;
3380     new_fb_ptr->mi_cols = cm->mi_cols;
3381   }
3382 }
3383
3384 void vp9_scale_references(VP9_COMP *cpi) {
3385   VP9_COMMON *cm = &cpi->common;
3386   MV_REFERENCE_FRAME ref_frame;
3387   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3388                                      VP9_ALT_FLAG };
3389
3390   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3391     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3392     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3393       BufferPool *const pool = cm->buffer_pool;
3394       const YV12_BUFFER_CONFIG *const ref =
3395           get_ref_frame_buffer(cpi, ref_frame);
3396
3397       if (ref == NULL) {
3398         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3399         continue;
3400       }
3401
3402 #if CONFIG_VP9_HIGHBITDEPTH
3403       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3404         RefCntBuffer *new_fb_ptr = NULL;
3405         int force_scaling = 0;
3406         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3407         if (new_fb == INVALID_IDX) {
3408           new_fb = get_free_fb(cm);
3409           force_scaling = 1;
3410         }
3411         if (new_fb == INVALID_IDX) return;
3412         new_fb_ptr = &pool->frame_bufs[new_fb];
3413         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3414             new_fb_ptr->buf.y_crop_height != cm->height) {
3415           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3416                                        cm->subsampling_x, cm->subsampling_y,
3417                                        cm->use_highbitdepth,
3418                                        VP9_ENC_BORDER_IN_PIXELS,
3419                                        cm->byte_alignment, NULL, NULL, NULL))
3420             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3421                                "Failed to allocate frame buffer");
3422           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3423                                  EIGHTTAP, 0);
3424           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3425           alloc_frame_mvs(cm, new_fb);
3426         }
3427 #else
3428       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3429         RefCntBuffer *new_fb_ptr = NULL;
3430         int force_scaling = 0;
3431         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3432         if (new_fb == INVALID_IDX) {
3433           new_fb = get_free_fb(cm);
3434           force_scaling = 1;
3435         }
3436         if (new_fb == INVALID_IDX) return;
3437         new_fb_ptr = &pool->frame_bufs[new_fb];
3438         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3439             new_fb_ptr->buf.y_crop_height != cm->height) {
3440           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3441                                        cm->subsampling_x, cm->subsampling_y,
3442                                        VP9_ENC_BORDER_IN_PIXELS,
3443                                        cm->byte_alignment, NULL, NULL, NULL))
3444             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3445                                "Failed to allocate frame buffer");
3446           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3447           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3448           alloc_frame_mvs(cm, new_fb);
3449         }
3450 #endif  // CONFIG_VP9_HIGHBITDEPTH
3451       } else {
3452         int buf_idx;
3453         RefCntBuffer *buf = NULL;
3454         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3455           // Check for release of scaled reference.
3456           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3457           if (buf_idx != INVALID_IDX) {
3458             buf = &pool->frame_bufs[buf_idx];
3459             --buf->ref_count;
3460             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3461           }
3462         }
3463         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3464         buf = &pool->frame_bufs[buf_idx];
3465         buf->buf.y_crop_width = ref->y_crop_width;
3466         buf->buf.y_crop_height = ref->y_crop_height;
3467         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3468         ++buf->ref_count;
3469       }
3470     } else {
3471       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3472         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3473     }
3474   }
3475 }
3476
3477 static void release_scaled_references(VP9_COMP *cpi) {
3478   VP9_COMMON *cm = &cpi->common;
3479   int i;
3480   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3481     // Only release scaled references under certain conditions:
3482     // if reference will be updated, or if scaled reference has same resolution.
3483     int refresh[3];
3484     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3485     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3486     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3487     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3488       const int idx = cpi->scaled_ref_idx[i - 1];
3489       if (idx != INVALID_IDX) {
3490         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3491         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3492         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3493                                buf->buf.y_crop_height == ref->y_crop_height)) {
3494           --buf->ref_count;
3495           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3496         }
3497       }
3498     }
3499   } else {
3500     for (i = 0; i < REFS_PER_FRAME; ++i) {
3501       const int idx = cpi->scaled_ref_idx[i];
3502       if (idx != INVALID_IDX) {
3503         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3504         --buf->ref_count;
3505         cpi->scaled_ref_idx[i] = INVALID_IDX;
3506       }
3507     }
3508   }
3509 }
3510
3511 static void full_to_model_count(unsigned int *model_count,
3512                                 unsigned int *full_count) {
3513   int n;
3514   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3515   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3516   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3517   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3518     model_count[TWO_TOKEN] += full_count[n];
3519   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3520 }
3521
3522 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3523                                  vp9_coeff_count *full_count) {
3524   int i, j, k, l;
3525
3526   for (i = 0; i < PLANE_TYPES; ++i)
3527     for (j = 0; j < REF_TYPES; ++j)
3528       for (k = 0; k < COEF_BANDS; ++k)
3529         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3530           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3531 }
3532
3533 #if 0 && CONFIG_INTERNAL_STATS
3534 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3535   VP9_COMMON *const cm = &cpi->common;
3536   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3537   int64_t recon_err;
3538
3539   vpx_clear_system_state();
3540
3541 #if CONFIG_VP9_HIGHBITDEPTH
3542   if (cm->use_highbitdepth) {
3543     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3544   } else {
3545     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3546   }
3547 #else
3548   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3549 #endif  // CONFIG_VP9_HIGHBITDEPTH
3550
3551
3552   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3553     double dc_quant_devisor;
3554 #if CONFIG_VP9_HIGHBITDEPTH
3555     switch (cm->bit_depth) {
3556       case VPX_BITS_8:
3557         dc_quant_devisor = 4.0;
3558         break;
3559       case VPX_BITS_10:
3560         dc_quant_devisor = 16.0;
3561         break;
3562       default:
3563         assert(cm->bit_depth == VPX_BITS_12);
3564         dc_quant_devisor = 64.0;
3565         break;
3566     }
3567 #else
3568     dc_quant_devisor = 4.0;
3569 #endif
3570
3571     if (!cm->current_video_frame) {
3572       fprintf(f, "frame, width, height, last ts, last end ts, "
3573           "source_alt_ref_pending, source_alt_ref_active, "
3574           "this_frame_target, projected_frame_size, "
3575           "projected_frame_size / MBs, "
3576           "projected_frame_size - this_frame_target, "
3577           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3578           "twopass.extend_minq, twopass.extend_minq_fast, "
3579           "total_target_vs_actual, "
3580           "starting_buffer_level - bits_off_target, "
3581           "total_actual_bits, base_qindex, q for base_qindex, "
3582           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3583           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3584           "frame_type, gfu_boost, "
3585           "twopass.bits_left, "
3586           "twopass.total_left_stats.coded_error, "
3587           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3588           "tot_recode_hits, recon_err, kf_boost, "
3589           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3590           "filter_level, seg.aq_av_offset\n");
3591     }
3592
3593     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3594         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3595         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3596         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3597         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3598         cpi->common.current_video_frame,
3599         cm->width, cm->height,
3600         cpi->last_time_stamp_seen,
3601         cpi->last_end_time_stamp_seen,
3602         cpi->rc.source_alt_ref_pending,
3603         cpi->rc.source_alt_ref_active,
3604         cpi->rc.this_frame_target,
3605         cpi->rc.projected_frame_size,
3606         cpi->rc.projected_frame_size / cpi->common.MBs,
3607         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3608         cpi->rc.vbr_bits_off_target,
3609         cpi->rc.vbr_bits_off_target_fast,
3610         cpi->twopass.extend_minq,
3611         cpi->twopass.extend_minq_fast,
3612         cpi->rc.total_target_vs_actual,
3613         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3614         cpi->rc.total_actual_bits, cm->base_qindex,
3615         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3616         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3617             dc_quant_devisor,
3618         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3619                                 cm->bit_depth),
3620         cpi->rc.avg_q,
3621         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3622         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3623         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3624         cpi->twopass.bits_left,
3625         cpi->twopass.total_left_stats.coded_error,
3626         cpi->twopass.bits_left /
3627             (1 + cpi->twopass.total_left_stats.coded_error),
3628         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3629         cpi->twopass.kf_zeromotion_pct,
3630         cpi->twopass.fr_content_type,
3631         cm->lf.filter_level,
3632         cm->seg.aq_av_offset);
3633   }
3634   fclose(f);
3635
3636   if (0) {
3637     FILE *const fmodes = fopen("Modes.stt", "a");
3638     int i;
3639
3640     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3641             cm->frame_type, cpi->refresh_golden_frame,
3642             cpi->refresh_alt_ref_frame);
3643
3644     for (i = 0; i < MAX_MODES; ++i)
3645       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3646
3647     fprintf(fmodes, "\n");
3648
3649     fclose(fmodes);
3650   }
3651 }
3652 #endif
3653
3654 static void set_mv_search_params(VP9_COMP *cpi) {
3655   const VP9_COMMON *const cm = &cpi->common;
3656   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3657
3658   // Default based on max resolution.
3659   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3660
3661   if (cpi->sf.mv.auto_mv_step_size) {
3662     if (frame_is_intra_only(cm)) {
3663       // Initialize max_mv_magnitude for use in the first INTER frame
3664       // after a key/intra-only frame.
3665       cpi->max_mv_magnitude = max_mv_def;
3666     } else {
3667       if (cm->show_frame) {
3668         // Allow mv_steps to correspond to twice the max mv magnitude found
3669         // in the previous frame, capped by the default max_mv_magnitude based
3670         // on resolution.
3671         cpi->mv_step_param = vp9_init_search_range(
3672             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3673       }
3674       cpi->max_mv_magnitude = 0;
3675     }
3676   }
3677 }
3678
3679 static void set_size_independent_vars(VP9_COMP *cpi) {
3680   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3681   vp9_set_rd_speed_thresholds(cpi);
3682   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3683   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3684 }
3685
3686 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3687                                     int *top_index) {
3688   VP9_COMMON *const cm = &cpi->common;
3689
3690   // Setup variables that depend on the dimensions of the frame.
3691   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3692
3693   // Decide q and q bounds.
3694   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3695
3696   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3697     *q = cpi->rc.worst_quality;
3698     cpi->rc.force_max_q = 0;
3699   }
3700
3701   if (!frame_is_intra_only(cm)) {
3702     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3703   }
3704
3705 #if !CONFIG_REALTIME_ONLY
3706   // Configure experimental use of segmentation for enhanced coding of
3707   // static regions if indicated.
3708   // Only allowed in the second pass of a two pass encode, as it requires
3709   // lagged coding, and if the relevant speed feature flag is set.
3710   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3711     configure_static_seg_features(cpi);
3712 #endif  // !CONFIG_REALTIME_ONLY
3713
3714 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3715   if (cpi->oxcf.noise_sensitivity > 0) {
3716     int l = 0;
3717     switch (cpi->oxcf.noise_sensitivity) {
3718       case 1: l = 20; break;
3719       case 2: l = 40; break;
3720       case 3: l = 60; break;
3721       case 4:
3722       case 5: l = 100; break;
3723       case 6: l = 150; break;
3724     }
3725     if (!cpi->common.postproc_state.limits) {
3726       cpi->common.postproc_state.limits =
3727           vpx_calloc(cpi->un_scaled_source->y_width,
3728                      sizeof(*cpi->common.postproc_state.limits));
3729     }
3730     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3731                 cpi->common.postproc_state.limits);
3732   }
3733 #endif  // CONFIG_VP9_POSTPROC
3734 }
3735
3736 static void init_motion_estimation(VP9_COMP *cpi) {
3737   int y_stride = cpi->scaled_source.y_stride;
3738
3739   if (cpi->sf.mv.search_method == NSTEP) {
3740     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3741   } else if (cpi->sf.mv.search_method == DIAMOND) {
3742     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3743   }
3744 }
3745
3746 static void set_frame_size(VP9_COMP *cpi) {
3747   int ref_frame;
3748   VP9_COMMON *const cm = &cpi->common;
3749   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3750   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3751
3752 #if !CONFIG_REALTIME_ONLY
3753   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3754       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3755        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3756     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3757                          &oxcf->scaled_frame_height);
3758
3759     // There has been a change in frame size.
3760     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3761                          oxcf->scaled_frame_height);
3762   }
3763 #endif  // !CONFIG_REALTIME_ONLY
3764
3765   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
3766       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3767     // For SVC scaled width/height will have been set (svc->resize_set=1)
3768     // in get_svc_params based on the layer width/height.
3769     if (!cpi->use_svc || !cpi->svc.resize_set) {
3770       oxcf->scaled_frame_width =
3771           (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3772       oxcf->scaled_frame_height =
3773           (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3774       // There has been a change in frame size.
3775       vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3776                            oxcf->scaled_frame_height);
3777     }
3778
3779     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3780     set_mv_search_params(cpi);
3781
3782     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3783 #if CONFIG_VP9_TEMPORAL_DENOISING
3784     // Reset the denoiser on the resized frame.
3785     if (cpi->oxcf.noise_sensitivity > 0) {
3786       vp9_denoiser_free(&(cpi->denoiser));
3787       setup_denoiser_buffer(cpi);
3788       // Dynamic resize is only triggered for non-SVC, so we can force
3789       // golden frame update here as temporary fix to denoiser.
3790       cpi->refresh_golden_frame = 1;
3791     }
3792 #endif
3793   }
3794
3795   if ((oxcf->pass == 2) && !cpi->use_svc) {
3796     vp9_set_target_rate(cpi);
3797   }
3798
3799   alloc_frame_mvs(cm, cm->new_fb_idx);
3800
3801   // Reset the frame pointers to the current frame size.
3802   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3803                                cm->subsampling_x, cm->subsampling_y,
3804 #if CONFIG_VP9_HIGHBITDEPTH
3805                                cm->use_highbitdepth,
3806 #endif
3807                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3808                                NULL, NULL, NULL))
3809     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3810                        "Failed to allocate frame buffer");
3811
3812   alloc_util_frame_buffers(cpi);
3813   init_motion_estimation(cpi);
3814
3815   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3816     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3817     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3818
3819     ref_buf->idx = buf_idx;
3820
3821     if (buf_idx != INVALID_IDX) {
3822       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3823       ref_buf->buf = buf;
3824 #if CONFIG_VP9_HIGHBITDEPTH
3825       vp9_setup_scale_factors_for_frame(
3826           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3827           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3828 #else
3829       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3830                                         buf->y_crop_height, cm->width,
3831                                         cm->height);
3832 #endif  // CONFIG_VP9_HIGHBITDEPTH
3833       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3834     } else {
3835       ref_buf->buf = NULL;
3836     }
3837   }
3838
3839   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3840 }
3841
3842 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3843 static void save_encode_params(VP9_COMP *cpi) {
3844   VP9_COMMON *const cm = &cpi->common;
3845   const int tile_cols = 1 << cm->log2_tile_cols;
3846   const int tile_rows = 1 << cm->log2_tile_rows;
3847   int tile_col, tile_row;
3848   int i, j;
3849   RD_OPT *rd_opt = &cpi->rd;
3850   for (i = 0; i < MAX_REF_FRAMES; i++) {
3851     for (j = 0; j < REFERENCE_MODES; j++)
3852       rd_opt->prediction_type_threshes_prev[i][j] =
3853           rd_opt->prediction_type_threshes[i][j];
3854
3855     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3856       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3857   }
3858
3859   if (cpi->tile_data != NULL) {
3860     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3861       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3862         TileDataEnc *tile_data =
3863             &cpi->tile_data[tile_row * tile_cols + tile_col];
3864         for (i = 0; i < BLOCK_SIZES; ++i) {
3865           for (j = 0; j < MAX_MODES; ++j) {
3866             tile_data->thresh_freq_fact_prev[i][j] =
3867                 tile_data->thresh_freq_fact[i][j];
3868           }
3869         }
3870       }
3871   }
3872 }
3873 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3874
3875 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3876 #ifdef ENABLE_KF_DENOISE
3877   if (is_spatial_denoise_enabled(cpi)) {
3878     cpi->raw_source_frame = vp9_scale_if_required(
3879         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3880         (oxcf->pass == 0), EIGHTTAP, 0);
3881   } else {
3882     cpi->raw_source_frame = cpi->Source;
3883   }
3884 #else
3885   cpi->raw_source_frame = cpi->Source;
3886 #endif
3887 }
3888
3889 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3890                                       uint8_t *dest) {
3891   VP9_COMMON *const cm = &cpi->common;
3892   SVC *const svc = &cpi->svc;
3893   int q = 0, bottom_index = 0, top_index = 0;
3894   int no_drop_scene_change = 0;
3895   const INTERP_FILTER filter_scaler =
3896       (is_one_pass_cbr_svc(cpi))
3897           ? svc->downsample_filter_type[svc->spatial_layer_id]
3898           : EIGHTTAP;
3899   const int phase_scaler =
3900       (is_one_pass_cbr_svc(cpi))
3901           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3902           : 0;
3903
3904   if (cm->show_existing_frame) {
3905     cpi->rc.this_frame_target = 0;
3906     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3907     return 1;
3908   }
3909
3910   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3911
3912   // Flag to check if its valid to compute the source sad (used for
3913   // scene detection and for superblock content state in CBR mode).
3914   // The flag may get reset below based on SVC or resizing state.
3915   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3916
3917   vpx_clear_system_state();
3918
3919   set_frame_size(cpi);
3920
3921   if (is_one_pass_cbr_svc(cpi) &&
3922       cpi->un_scaled_source->y_width == cm->width << 2 &&
3923       cpi->un_scaled_source->y_height == cm->height << 2 &&
3924       svc->scaled_temp.y_width == cm->width << 1 &&
3925       svc->scaled_temp.y_height == cm->height << 1) {
3926     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3927     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3928     // result will be saved in scaled_temp and might be used later.
3929     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3930     const int phase_scaler2 = svc->downsample_filter_phase[1];
3931     cpi->Source = vp9_svc_twostage_scale(
3932         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3933         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3934     svc->scaled_one_half = 1;
3935   } else if (is_one_pass_cbr_svc(cpi) &&
3936              cpi->un_scaled_source->y_width == cm->width << 1 &&
3937              cpi->un_scaled_source->y_height == cm->height << 1 &&
3938              svc->scaled_one_half) {
3939     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3940     // two-stage scaling, use the result directly.
3941     cpi->Source = &svc->scaled_temp;
3942     svc->scaled_one_half = 0;
3943   } else {
3944     cpi->Source = vp9_scale_if_required(
3945         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3946         filter_scaler, phase_scaler);
3947   }
3948 #ifdef OUTPUT_YUV_SVC_SRC
3949   // Write out at most 3 spatial layers.
3950   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3951     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3952   }
3953 #endif
3954   // Unfiltered raw source used in metrics calculation if the source
3955   // has been filtered.
3956   if (is_psnr_calc_enabled(cpi)) {
3957 #ifdef ENABLE_KF_DENOISE
3958     if (is_spatial_denoise_enabled(cpi)) {
3959       cpi->raw_source_frame = vp9_scale_if_required(
3960           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3961           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3962     } else {
3963       cpi->raw_source_frame = cpi->Source;
3964     }
3965 #else
3966     cpi->raw_source_frame = cpi->Source;
3967 #endif
3968   }
3969
3970   if ((cpi->use_svc &&
3971        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3972         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3973         svc->current_superframe < 1)) ||
3974       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3975       cpi->resize_state != ORIG) {
3976     cpi->compute_source_sad_onepass = 0;
3977     if (cpi->content_state_sb_fd != NULL)
3978       memset(cpi->content_state_sb_fd, 0,
3979              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3980                  sizeof(*cpi->content_state_sb_fd));
3981   }
3982
3983   // Avoid scaling last_source unless its needed.
3984   // Last source is needed if avg_source_sad() is used, or if
3985   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3986   // estimation is enabled.
3987   if (cpi->unscaled_last_source != NULL &&
3988       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3989        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3990         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3991        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3992        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3993        cpi->compute_source_sad_onepass))
3994     cpi->Last_Source = vp9_scale_if_required(
3995         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3996         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3997
3998   if (cpi->Last_Source == NULL ||
3999       cpi->Last_Source->y_width != cpi->Source->y_width ||
4000       cpi->Last_Source->y_height != cpi->Source->y_height)
4001     cpi->compute_source_sad_onepass = 0;
4002
4003   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
4004     memset(cpi->consec_zero_mv, 0,
4005            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
4006   }
4007
4008 #if CONFIG_VP9_TEMPORAL_DENOISING
4009   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
4010     vp9_denoiser_reset_on_first_frame(cpi);
4011 #endif
4012
4013   // Scene detection is always used for VBR mode or screen-content case.
4014   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
4015   // (need to check encoding time cost for doing this for speed 8).
4016   cpi->rc.high_source_sad = 0;
4017   cpi->rc.hybrid_intra_scene_change = 0;
4018   cpi->rc.re_encode_maxq_scene_change = 0;
4019   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
4020       (cpi->oxcf.rc_mode == VPX_VBR ||
4021        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
4022        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
4023     vp9_scene_detection_onepass(cpi);
4024
4025   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
4026     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
4027     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
4028     // On scene change reset temporal layer pattern to TL0.
4029     // Note that if the base/lower spatial layers are skipped: instead of
4030     // inserting base layer here, we force max-q for the next superframe
4031     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
4032     // when max-q is decided for the current layer.
4033     // Only do this reset for bypass/flexible mode.
4034     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
4035         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
4036       // rc->high_source_sad will get reset so copy it to restore it.
4037       int tmp_high_source_sad = cpi->rc.high_source_sad;
4038       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
4039       cpi->rc.high_source_sad = tmp_high_source_sad;
4040     }
4041   }
4042
4043   vp9_update_noise_estimate(cpi);
4044
4045   // For 1 pass CBR, check if we are dropping this frame.
4046   // Never drop on key frame, if base layer is key for svc,
4047   // on scene change, or if superframe has layer sync.
4048   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
4049       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
4050     no_drop_scene_change = 1;
4051   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
4052       !frame_is_intra_only(cm) && !no_drop_scene_change &&
4053       !svc->superframe_has_layer_sync &&
4054       (!cpi->use_svc ||
4055        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
4056     if (vp9_rc_drop_frame(cpi)) return 0;
4057   }
4058
4059   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
4060   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
4061   // avoid this frame-level upsampling (for non intra_only frames).
4062   // For SVC single_layer mode, dynamic resize is allowed and we need to
4063   // scale references for this case.
4064   if (frame_is_intra_only(cm) == 0 &&
4065       ((svc->single_layer_svc && cpi->oxcf.resize_mode == RESIZE_DYNAMIC) ||
4066        !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref))) {
4067     vp9_scale_references(cpi);
4068   }
4069
4070   set_size_independent_vars(cpi);
4071   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4072
4073   // search method and step parameter might be changed in speed settings.
4074   init_motion_estimation(cpi);
4075
4076   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
4077
4078   if (cpi->sf.svc_use_lowres_part &&
4079       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
4080     if (svc->prev_partition_svc == NULL) {
4081       CHECK_MEM_ERROR(
4082           cm, svc->prev_partition_svc,
4083           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
4084                                    sizeof(*svc->prev_partition_svc)));
4085     }
4086   }
4087
4088   // TODO(jianj): Look into issue of skin detection with high bitdepth.
4089   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
4090       cpi->oxcf.rc_mode == VPX_CBR &&
4091       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
4092       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4093     cpi->use_skin_detection = 1;
4094   }
4095
4096   // Enable post encode frame dropping for CBR on non key frame, when
4097   // ext_use_post_encode_drop is specified by user.
4098   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
4099                                  cpi->oxcf.rc_mode == VPX_CBR &&
4100                                  cm->frame_type != KEY_FRAME;
4101
4102   vp9_set_quantizer(cpi, q);
4103   vp9_set_variance_partition_thresholds(cpi, q, 0);
4104
4105   setup_frame(cpi);
4106
4107   suppress_active_map(cpi);
4108
4109   if (cpi->use_svc) {
4110     // On non-zero spatial layer, check for disabling inter-layer
4111     // prediction.
4112     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4113     vp9_svc_assert_constraints_pattern(cpi);
4114   }
4115
4116   if (cpi->rc.last_post_encode_dropped_scene_change) {
4117     cpi->rc.high_source_sad = 1;
4118     svc->high_source_sad_superframe = 1;
4119     // For now disable use_source_sad since Last_Source will not be the previous
4120     // encoded but the dropped one.
4121     cpi->sf.use_source_sad = 0;
4122     cpi->rc.last_post_encode_dropped_scene_change = 0;
4123   }
4124   // Check if this high_source_sad (scene/slide change) frame should be
4125   // encoded at high/max QP, and if so, set the q and adjust some rate
4126   // control parameters.
4127   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4128       (cpi->rc.high_source_sad ||
4129        (cpi->use_svc && svc->high_source_sad_superframe))) {
4130     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4131       vp9_set_quantizer(cpi, q);
4132       vp9_set_variance_partition_thresholds(cpi, q, 0);
4133     }
4134   }
4135
4136 #if !CONFIG_REALTIME_ONLY
4137   // Variance adaptive and in frame q adjustment experiments are mutually
4138   // exclusive.
4139   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4140     vp9_vaq_frame_setup(cpi);
4141   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4142     vp9_360aq_frame_setup(cpi);
4143   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4144     vp9_setup_in_frame_q_adj(cpi);
4145   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4146     // it may be pretty bad for rate-control,
4147     // and I should handle it somehow
4148     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4149   } else {
4150 #endif
4151     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4152       vp9_cyclic_refresh_setup(cpi);
4153     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4154       apply_roi_map(cpi);
4155     }
4156 #if !CONFIG_REALTIME_ONLY
4157   }
4158 #endif
4159
4160   apply_active_map(cpi);
4161
4162   vp9_encode_frame(cpi);
4163
4164   // Check if we should re-encode this frame at high Q because of high
4165   // overshoot based on the encoded frame size. Only for frames where
4166   // high temporal-source SAD is detected.
4167   // For SVC: all spatial layers are checked for re-encoding.
4168   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4169       (cpi->rc.high_source_sad ||
4170        (cpi->use_svc && svc->high_source_sad_superframe))) {
4171     int frame_size = 0;
4172     // Get an estimate of the encoded frame size.
4173     save_coding_context(cpi);
4174     vp9_pack_bitstream(cpi, dest, size);
4175     restore_coding_context(cpi);
4176     frame_size = (int)(*size) << 3;
4177     // Check if encoded frame will overshoot too much, and if so, set the q and
4178     // adjust some rate control parameters, and return to re-encode the frame.
4179     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4180       vpx_clear_system_state();
4181       vp9_set_quantizer(cpi, q);
4182       vp9_set_variance_partition_thresholds(cpi, q, 0);
4183       suppress_active_map(cpi);
4184       // Turn-off cyclic refresh for re-encoded frame.
4185       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4186         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4187         unsigned char *const seg_map = cpi->segmentation_map;
4188         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4189         memset(cr->last_coded_q_map, MAXQ,
4190                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4191         cr->sb_index = 0;
4192         vp9_disable_segmentation(&cm->seg);
4193       }
4194       apply_active_map(cpi);
4195       vp9_encode_frame(cpi);
4196     }
4197   }
4198
4199   // Update some stats from cyclic refresh, and check for golden frame update.
4200   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4201       !frame_is_intra_only(cm))
4202     vp9_cyclic_refresh_postencode(cpi);
4203
4204   // Update the skip mb flag probabilities based on the distribution
4205   // seen in the last encoder iteration.
4206   // update_base_skip_probs(cpi);
4207   vpx_clear_system_state();
4208   return 1;
4209 }
4210
4211 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4212   const int *const map = cpi->common.ref_frame_map;
4213   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4214   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4215   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4216   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4217
4218   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4219
4220   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4221       (cpi->svc.number_temporal_layers == 1 &&
4222        cpi->svc.number_spatial_layers == 1))
4223     flags &= ~VP9_GOLD_FLAG;
4224
4225   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4226
4227   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4228
4229   return flags;
4230 }
4231
4232 #if !CONFIG_REALTIME_ONLY
4233 #define MAX_QSTEP_ADJ 4
4234 static int get_qstep_adj(int rate_excess, int rate_limit) {
4235   int qstep =
4236       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4237   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4238 }
4239
4240 #if CONFIG_RATE_CTRL
4241 static void init_rq_history(RATE_QINDEX_HISTORY *rq_history) {
4242   rq_history->recode_count = 0;
4243   rq_history->q_index_high = 255;
4244   rq_history->q_index_low = 0;
4245 }
4246
4247 static void update_rq_history(RATE_QINDEX_HISTORY *rq_history, int target_bits,
4248                               int actual_bits, int q_index) {
4249   rq_history->q_index_history[rq_history->recode_count] = q_index;
4250   rq_history->rate_history[rq_history->recode_count] = actual_bits;
4251   if (actual_bits <= target_bits) {
4252     rq_history->q_index_high = q_index;
4253   }
4254   if (actual_bits >= target_bits) {
4255     rq_history->q_index_low = q_index;
4256   }
4257   rq_history->recode_count += 1;
4258 }
4259
4260 static int guess_q_index_from_model(const RATE_QSTEP_MODEL *rq_model,
4261                                     int target_bits) {
4262   // The model predicts bits as follows.
4263   // target_bits = bias - ratio * log2(q_step)
4264   // Given the target_bits, we compute the q_step as follows.
4265   double q_step;
4266   assert(rq_model->ratio > 0);
4267   q_step = pow(2.0, (rq_model->bias - target_bits) / rq_model->ratio);
4268   // TODO(angiebird): Make this function support highbitdepth.
4269   return vp9_convert_q_to_qindex(q_step, VPX_BITS_8);
4270 }
4271
4272 static int guess_q_index_linear(int prev_q_index, int target_bits,
4273                                 int actual_bits, int gap) {
4274   int q_index = prev_q_index;
4275   if (actual_bits < target_bits) {
4276     q_index -= gap;
4277     q_index = VPXMAX(q_index, 0);
4278   } else {
4279     q_index += gap;
4280     q_index = VPXMIN(q_index, 255);
4281   }
4282   return q_index;
4283 }
4284
4285 static double get_bits_percent_diff(int target_bits, int actual_bits) {
4286   double diff;
4287   target_bits = VPXMAX(target_bits, 1);
4288   diff = abs(target_bits - actual_bits) * 1. / target_bits;
4289   return diff * 100;
4290 }
4291
4292 static int rq_model_predict_q_index(const RATE_QSTEP_MODEL *rq_model,
4293                                     const RATE_QINDEX_HISTORY *rq_history,
4294                                     int target_bits) {
4295   int q_index = 128;
4296   if (rq_history->recode_count > 0) {
4297     const int actual_bits =
4298         rq_history->rate_history[rq_history->recode_count - 1];
4299     const int prev_q_index =
4300         rq_history->q_index_history[rq_history->recode_count - 1];
4301     const double percent_diff = get_bits_percent_diff(target_bits, actual_bits);
4302     if (percent_diff > 50) {
4303       // Binary search.
4304       // When the actual_bits and target_bits are far apart, binary search
4305       // q_index is faster.
4306       q_index = (rq_history->q_index_low + rq_history->q_index_high) / 2;
4307     } else {
4308       if (rq_model->ready) {
4309         q_index = guess_q_index_from_model(rq_model, target_bits);
4310       } else {
4311         // TODO(angiebird): Find a better way to set the gap.
4312         q_index =
4313             guess_q_index_linear(prev_q_index, target_bits, actual_bits, 20);
4314       }
4315     }
4316   } else {
4317     if (rq_model->ready) {
4318       q_index = guess_q_index_from_model(rq_model, target_bits);
4319     }
4320   }
4321
4322   assert(rq_history->q_index_low <= rq_history->q_index_high);
4323   if (q_index <= rq_history->q_index_low) {
4324     q_index = rq_history->q_index_low + 1;
4325   }
4326   if (q_index >= rq_history->q_index_high) {
4327     q_index = rq_history->q_index_high - 1;
4328   }
4329   return q_index;
4330 }
4331
4332 static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
4333                             int target_bits, RATE_QSTEP_MODEL *rq_model) {
4334   const int recode_count = rq_history->recode_count;
4335   const double delta = 0.00001;
4336   if (recode_count >= 2) {
4337     const int q_index1 = rq_history->q_index_history[recode_count - 2];
4338     const int q_index2 = rq_history->q_index_history[recode_count - 1];
4339     const int r1 = rq_history->rate_history[recode_count - 2];
4340     const int r2 = rq_history->rate_history[recode_count - 1];
4341     int valid = 0;
4342     // lower q_index should yield higher bit rate
4343     if (q_index1 < q_index2) {
4344       valid = r1 > r2;
4345     } else if (q_index1 > q_index2) {
4346       valid = r1 < r2;
4347     }
4348     // Only update the model when the q_index and rate behave normally.
4349     if (valid) {
4350       // Fit the ratio and bias of rq_model based on last two recode histories.
4351       const double s1 = vp9_convert_qindex_to_q(q_index1, VPX_BITS_8);
4352       const double s2 = vp9_convert_qindex_to_q(q_index2, VPX_BITS_8);
4353       if (fabs(log2(s1) - log2(s2)) > delta) {
4354         rq_model->ratio = (r2 - r1) / (log2(s1) - log2(s2));
4355         rq_model->bias = r1 + (rq_model->ratio) * log2(s1);
4356         if (rq_model->ratio > delta && rq_model->bias > delta) {
4357           rq_model->ready = 1;
4358         }
4359       }
4360     }
4361   } else if (recode_count == 1) {
4362     if (rq_model->ready) {
4363       // Update the ratio only when the initial model exists and we only have
4364       // one recode history.
4365       const int prev_q = rq_history->q_index_history[recode_count - 1];
4366       const double prev_q_step = vp9_convert_qindex_to_q(prev_q, VPX_BITS_8);
4367       if (fabs(log2(prev_q_step)) > delta) {
4368         const int actual_bits = rq_history->rate_history[recode_count - 1];
4369         rq_model->ratio =
4370             rq_model->ratio + (target_bits - actual_bits) / log2(prev_q_step);
4371       }
4372     }
4373   }
4374 }
4375 #endif  // CONFIG_RATE_CTRL
4376
4377 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest
4378 #if CONFIG_RATE_CTRL
4379                                     ,
4380                                     RATE_QINDEX_HISTORY *rq_history
4381 #endif  // CONFIG_RATE_CTRL
4382 ) {
4383   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4384   VP9_COMMON *const cm = &cpi->common;
4385   RATE_CONTROL *const rc = &cpi->rc;
4386   int bottom_index, top_index;
4387   int loop_count = 0;
4388   int loop_at_this_size = 0;
4389   int loop = 0;
4390   int overshoot_seen = 0;
4391   int undershoot_seen = 0;
4392   int frame_over_shoot_limit;
4393   int frame_under_shoot_limit;
4394   int q = 0, q_low = 0, q_high = 0;
4395   int enable_acl;
4396 #ifdef AGGRESSIVE_VBR
4397   int qrange_adj = 1;
4398 #endif
4399
4400 #if CONFIG_RATE_CTRL
4401   const FRAME_UPDATE_TYPE update_type =
4402       cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
4403   const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
4404   RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
4405   init_rq_history(rq_history);
4406 #endif  // CONFIG_RATE_CTRL
4407
4408   if (cm->show_existing_frame) {
4409     rc->this_frame_target = 0;
4410     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4411     return;
4412   }
4413
4414   set_size_independent_vars(cpi);
4415
4416   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4417                                        (cpi->twopass.gf_group.index == 1)
4418                                  : 0;
4419
4420   do {
4421     vpx_clear_system_state();
4422
4423     set_frame_size(cpi);
4424
4425     if (loop_count == 0 || cpi->resize_pending != 0) {
4426       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4427
4428 #ifdef AGGRESSIVE_VBR
4429       if (two_pass_first_group_inter(cpi)) {
4430         // Adjustment limits for min and max q
4431         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4432
4433         bottom_index =
4434             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4435         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4436       }
4437 #endif
4438       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4439       set_mv_search_params(cpi);
4440
4441       // Reset the loop state for new frame size.
4442       overshoot_seen = 0;
4443       undershoot_seen = 0;
4444
4445       // Reconfiguration for change in frame size has concluded.
4446       cpi->resize_pending = 0;
4447
4448       q_low = bottom_index;
4449       q_high = top_index;
4450
4451       loop_at_this_size = 0;
4452     }
4453
4454 #if CONFIG_RATE_CTRL
4455     if (cpi->encode_command.use_external_target_frame_bits) {
4456       q = rq_model_predict_q_index(rq_model, rq_history, rc->this_frame_target);
4457     }
4458 #endif  // CONFIG_RATE_CTRL
4459     // Decide frame size bounds first time through.
4460     if (loop_count == 0) {
4461       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4462                                        &frame_under_shoot_limit,
4463                                        &frame_over_shoot_limit);
4464     }
4465
4466     cpi->Source =
4467         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4468                               (oxcf->pass == 0), EIGHTTAP, 0);
4469
4470     // Unfiltered raw source used in metrics calculation if the source
4471     // has been filtered.
4472     if (is_psnr_calc_enabled(cpi)) {
4473 #ifdef ENABLE_KF_DENOISE
4474       if (is_spatial_denoise_enabled(cpi)) {
4475         cpi->raw_source_frame = vp9_scale_if_required(
4476             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4477             (oxcf->pass == 0), EIGHTTAP, 0);
4478       } else {
4479         cpi->raw_source_frame = cpi->Source;
4480       }
4481 #else
4482       cpi->raw_source_frame = cpi->Source;
4483 #endif
4484     }
4485
4486     if (cpi->unscaled_last_source != NULL)
4487       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4488                                                &cpi->scaled_last_source,
4489                                                (oxcf->pass == 0), EIGHTTAP, 0);
4490
4491     if (frame_is_intra_only(cm) == 0) {
4492       if (loop_count > 0) {
4493         release_scaled_references(cpi);
4494       }
4495       vp9_scale_references(cpi);
4496     }
4497
4498 #if CONFIG_RATE_CTRL
4499     // TODO(angiebird): This is a hack for making sure the encoder use the
4500     // external_quantize_index exactly. Avoid this kind of hack later.
4501     if (cpi->encode_command.use_external_quantize_index) {
4502       q = cpi->encode_command.external_quantize_index;
4503     }
4504 #endif
4505     if (cpi->ext_ratectrl.ready) {
4506       const GF_GROUP *gf_group = &cpi->twopass.gf_group;
4507       vpx_rc_encodeframe_decision_t encode_frame_decision;
4508       FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
4509       const int ref_frame_flags = get_ref_frame_flags(cpi);
4510       RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
4511       get_ref_frame_bufs(cpi, ref_frame_bufs);
4512       vp9_extrc_get_encodeframe_decision(
4513           &cpi->ext_ratectrl, cm->current_video_frame,
4514           cm->current_frame_coding_index, update_type, ref_frame_bufs,
4515           ref_frame_flags, &encode_frame_decision);
4516       q = encode_frame_decision.q_index;
4517     }
4518
4519     vp9_set_quantizer(cpi, q);
4520
4521     if (loop_count == 0) setup_frame(cpi);
4522
4523     // Variance adaptive and in frame q adjustment experiments are mutually
4524     // exclusive.
4525     if (oxcf->aq_mode == VARIANCE_AQ) {
4526       vp9_vaq_frame_setup(cpi);
4527     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4528       vp9_360aq_frame_setup(cpi);
4529     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4530       vp9_setup_in_frame_q_adj(cpi);
4531     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4532       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4533     } else if (oxcf->aq_mode == PSNR_AQ) {
4534       vp9_psnr_aq_mode_setup(&cm->seg);
4535     }
4536
4537     vp9_encode_frame(cpi);
4538
4539     // Update the skip mb flag probabilities based on the distribution
4540     // seen in the last encoder iteration.
4541     // update_base_skip_probs(cpi);
4542
4543     vpx_clear_system_state();
4544
4545     // Dummy pack of the bitstream using up to date stats to get an
4546     // accurate estimate of output frame size to determine if we need
4547     // to recode.
4548     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4549       save_coding_context(cpi);
4550       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4551
4552       rc->projected_frame_size = (int)(*size) << 3;
4553
4554       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4555     }
4556
4557     if (cpi->ext_ratectrl.ready) {
4558       break;
4559     }
4560 #if CONFIG_RATE_CTRL
4561     // This part needs to be after save_coding_context() because
4562     // restore_coding_context will be called in the end of this function.
4563     // TODO(angiebird): This is a hack for making sure the encoder use the
4564     // external_quantize_index exactly. Avoid this kind of hack later.
4565     if (cpi->encode_command.use_external_quantize_index) {
4566       break;
4567     }
4568
4569     if (cpi->encode_command.use_external_target_frame_bits) {
4570       const double percent_diff = get_bits_percent_diff(
4571           rc->this_frame_target, rc->projected_frame_size);
4572       update_rq_history(rq_history, rc->this_frame_target,
4573                         rc->projected_frame_size, q);
4574       loop_count += 1;
4575
4576       rq_model_update(rq_history, rc->this_frame_target, rq_model);
4577
4578       // Check if we hit the target bitrate.
4579       if (percent_diff <= cpi->encode_command.target_frame_bits_error_percent ||
4580           rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
4581           rq_history->q_index_low >= rq_history->q_index_high) {
4582         break;
4583       }
4584
4585       loop = 1;
4586       restore_coding_context(cpi);
4587       continue;
4588     }
4589 #endif  // CONFIG_RATE_CTRL
4590
4591     if (oxcf->rc_mode == VPX_Q) {
4592       loop = 0;
4593     } else {
4594       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4595           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4596         int last_q = q;
4597         int64_t kf_err;
4598
4599         int64_t high_err_target = cpi->ambient_err;
4600         int64_t low_err_target = cpi->ambient_err >> 1;
4601
4602 #if CONFIG_VP9_HIGHBITDEPTH
4603         if (cm->use_highbitdepth) {
4604           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4605         } else {
4606           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4607         }
4608 #else
4609         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4610 #endif  // CONFIG_VP9_HIGHBITDEPTH
4611
4612         // Prevent possible divide by zero error below for perfect KF
4613         kf_err += !kf_err;
4614
4615         // The key frame is not good enough or we can afford
4616         // to make it better without undue risk of popping.
4617         if ((kf_err > high_err_target &&
4618              rc->projected_frame_size <= frame_over_shoot_limit) ||
4619             (kf_err > low_err_target &&
4620              rc->projected_frame_size <= frame_under_shoot_limit)) {
4621           // Lower q_high
4622           q_high = q > q_low ? q - 1 : q_low;
4623
4624           // Adjust Q
4625           q = (int)((q * high_err_target) / kf_err);
4626           q = VPXMIN(q, (q_high + q_low) >> 1);
4627         } else if (kf_err < low_err_target &&
4628                    rc->projected_frame_size >= frame_under_shoot_limit) {
4629           // The key frame is much better than the previous frame
4630           // Raise q_low
4631           q_low = q < q_high ? q + 1 : q_high;
4632
4633           // Adjust Q
4634           q = (int)((q * low_err_target) / kf_err);
4635           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4636         }
4637
4638         // Clamp Q to upper and lower limits:
4639         q = clamp(q, q_low, q_high);
4640
4641         loop = q != last_q;
4642       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4643                                   frame_under_shoot_limit, q,
4644                                   VPXMAX(q_high, top_index), bottom_index)) {
4645         // Is the projected frame size out of range and are we allowed
4646         // to attempt to recode.
4647         int last_q = q;
4648         int retries = 0;
4649         int qstep;
4650
4651         if (cpi->resize_pending == 1) {
4652           // Change in frame size so go back around the recode loop.
4653           cpi->rc.frame_size_selector =
4654               SCALE_STEP1 - cpi->rc.frame_size_selector;
4655           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4656
4657 #if CONFIG_INTERNAL_STATS
4658           ++cpi->tot_recode_hits;
4659 #endif
4660           ++loop_count;
4661           loop = 1;
4662           continue;
4663         }
4664
4665         // Frame size out of permitted range:
4666         // Update correction factor & compute new Q to try...
4667
4668         // Frame is too large
4669         if (rc->projected_frame_size > rc->this_frame_target) {
4670           // Special case if the projected size is > the max allowed.
4671           if ((q == q_high) &&
4672               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4673                (!rc->is_src_frame_alt_ref &&
4674                 (rc->projected_frame_size >=
4675                  big_rate_miss_high_threshold(cpi))))) {
4676             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4677                                             big_rate_miss_high_threshold(cpi)));
4678             double q_val_high;
4679             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4680             q_val_high =
4681                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4682             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4683             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4684           }
4685
4686           // Raise Qlow as to at least the current value
4687           qstep =
4688               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4689           q_low = VPXMIN(q + qstep, q_high);
4690
4691           if (undershoot_seen || loop_at_this_size > 1) {
4692             // Update rate_correction_factor unless
4693             vp9_rc_update_rate_correction_factors(cpi);
4694
4695             q = (q_high + q_low + 1) / 2;
4696           } else {
4697             // Update rate_correction_factor unless
4698             vp9_rc_update_rate_correction_factors(cpi);
4699
4700             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4701                                   VPXMAX(q_high, top_index));
4702
4703             while (q < q_low && retries < 10) {
4704               vp9_rc_update_rate_correction_factors(cpi);
4705               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4706                                     VPXMAX(q_high, top_index));
4707               retries++;
4708             }
4709           }
4710
4711           overshoot_seen = 1;
4712         } else {
4713           // Frame is too small
4714           qstep =
4715               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4716           q_high = VPXMAX(q - qstep, q_low);
4717
4718           if (overshoot_seen || loop_at_this_size > 1) {
4719             vp9_rc_update_rate_correction_factors(cpi);
4720             q = (q_high + q_low) / 2;
4721           } else {
4722             vp9_rc_update_rate_correction_factors(cpi);
4723             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4724                                   VPXMIN(q_low, bottom_index), top_index);
4725             // Special case reset for qlow for constrained quality.
4726             // This should only trigger where there is very substantial
4727             // undershoot on a frame and the auto cq level is above
4728             // the user passed in value.
4729             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4730               q_low = q;
4731             }
4732
4733             while (q > q_high && retries < 10) {
4734               vp9_rc_update_rate_correction_factors(cpi);
4735               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4736                                     VPXMIN(q_low, bottom_index), top_index);
4737               retries++;
4738             }
4739           }
4740           undershoot_seen = 1;
4741         }
4742
4743         // Clamp Q to upper and lower limits:
4744         q = clamp(q, q_low, q_high);
4745
4746         loop = (q != last_q);
4747       } else {
4748         loop = 0;
4749       }
4750     }
4751
4752     // Special case for overlay frame.
4753     if (rc->is_src_frame_alt_ref &&
4754         rc->projected_frame_size < rc->max_frame_bandwidth)
4755       loop = 0;
4756
4757     if (loop) {
4758       ++loop_count;
4759       ++loop_at_this_size;
4760
4761 #if CONFIG_INTERNAL_STATS
4762       ++cpi->tot_recode_hits;
4763 #endif
4764     }
4765
4766     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4767       if (loop) restore_coding_context(cpi);
4768   } while (loop);
4769
4770 #ifdef AGGRESSIVE_VBR
4771   if (two_pass_first_group_inter(cpi)) {
4772     cpi->twopass.active_worst_quality =
4773         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4774   } else if (!frame_is_kf_gf_arf(cpi)) {
4775 #else
4776   if (!frame_is_kf_gf_arf(cpi)) {
4777 #endif
4778     // Have we been forced to adapt Q outside the expected range by an extreme
4779     // rate miss. If so adjust the active maxQ for the subsequent frames.
4780     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4781       cpi->twopass.active_worst_quality = q;
4782     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4783                rc->projected_frame_size < rc->this_frame_target) {
4784       cpi->twopass.active_worst_quality =
4785           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4786     }
4787   }
4788
4789   if (enable_acl) {
4790     // Skip recoding, if model diff is below threshold
4791     const int thresh = compute_context_model_thresh(cpi);
4792     const int diff = compute_context_model_diff(cm);
4793     if (diff >= thresh) {
4794       vp9_encode_frame(cpi);
4795     }
4796   }
4797   if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4798     vpx_clear_system_state();
4799     restore_coding_context(cpi);
4800   }
4801 }
4802 #endif  // !CONFIG_REALTIME_ONLY
4803
4804 static void set_ext_overrides(VP9_COMP *cpi) {
4805   // Overrides the defaults with the externally supplied values with
4806   // vp9_update_reference() and vp9_update_entropy() calls
4807   // Note: The overrides are valid only for the next frame passed
4808   // to encode_frame_to_data_rate() function
4809   if (cpi->ext_refresh_frame_context_pending) {
4810     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4811     cpi->ext_refresh_frame_context_pending = 0;
4812   }
4813   if (cpi->ext_refresh_frame_flags_pending) {
4814     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4815     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4816     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4817   }
4818 }
4819
4820 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4821     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4822     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4823     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4824   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4825       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4826 #if CONFIG_VP9_HIGHBITDEPTH
4827     if (cm->bit_depth == VPX_BITS_8) {
4828       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4829                                  phase_scaler2);
4830       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4831                                  phase_scaler);
4832     } else {
4833       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4834                              filter_type2, phase_scaler2);
4835       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4836                              filter_type, phase_scaler);
4837     }
4838 #else
4839     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4840                                phase_scaler2);
4841     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4842 #endif  // CONFIG_VP9_HIGHBITDEPTH
4843     return scaled;
4844   } else {
4845     return unscaled;
4846   }
4847 }
4848
4849 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4850     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4851     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4852   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4853       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4854 #if CONFIG_VP9_HIGHBITDEPTH
4855     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4856         unscaled->y_height <= (scaled->y_height << 1))
4857       if (cm->bit_depth == VPX_BITS_8)
4858         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4859       else
4860         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4861                                filter_type, phase_scaler);
4862     else
4863       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4864 #else
4865     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4866         unscaled->y_height <= (scaled->y_height << 1))
4867       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4868     else
4869       scale_and_extend_frame_nonnormative(unscaled, scaled);
4870 #endif  // CONFIG_VP9_HIGHBITDEPTH
4871     return scaled;
4872   } else {
4873     return unscaled;
4874   }
4875 }
4876
4877 static void set_ref_sign_bias(VP9_COMP *cpi) {
4878   VP9_COMMON *const cm = &cpi->common;
4879   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4880   const int cur_frame_index = ref_buffer->frame_index;
4881   MV_REFERENCE_FRAME ref_frame;
4882
4883   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4884     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4885     const RefCntBuffer *const ref_cnt_buf =
4886         get_ref_cnt_buffer(&cpi->common, buf_idx);
4887     if (ref_cnt_buf) {
4888       cm->ref_frame_sign_bias[ref_frame] =
4889           cur_frame_index < ref_cnt_buf->frame_index;
4890     }
4891   }
4892 }
4893
4894 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4895   INTERP_FILTER ifilter;
4896   int ref_total[MAX_REF_FRAMES] = { 0 };
4897   MV_REFERENCE_FRAME ref;
4898   int mask = 0;
4899   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4900     return mask;
4901   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4902     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4903       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4904
4905   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4906     if ((ref_total[LAST_FRAME] &&
4907          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4908         (ref_total[GOLDEN_FRAME] == 0 ||
4909          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4910              ref_total[GOLDEN_FRAME]) &&
4911         (ref_total[ALTREF_FRAME] == 0 ||
4912          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4913              ref_total[ALTREF_FRAME]))
4914       mask |= 1 << ifilter;
4915   }
4916   return mask;
4917 }
4918
4919 #ifdef ENABLE_KF_DENOISE
4920 // Baseline kernel weights for denoise
4921 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4922 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4923                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4924
4925 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4926                                      uint8_t point_weight, int *sum_val,
4927                                      int *sum_weight) {
4928   if (abs(centre_val - data_val) <= thresh) {
4929     *sum_weight += point_weight;
4930     *sum_val += (int)data_val * (int)point_weight;
4931   }
4932 }
4933
4934 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4935                                   const int strength) {
4936   int sum_weight = 0;
4937   int sum_val = 0;
4938   int thresh = strength;
4939   int kernal_size = 5;
4940   int half_k_size = 2;
4941   int i, j;
4942   int max_diff = 0;
4943   uint8_t *tmp_ptr;
4944   uint8_t *kernal_ptr;
4945
4946   // Find the maximum deviation from the source point in the locale.
4947   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4948   for (i = 0; i < kernal_size + 2; ++i) {
4949     for (j = 0; j < kernal_size + 2; ++j) {
4950       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4951     }
4952     tmp_ptr += stride;
4953   }
4954
4955   // Select the kernel size.
4956   if (max_diff > (strength + (strength >> 1))) {
4957     kernal_size = 3;
4958     half_k_size = 1;
4959     thresh = thresh >> 1;
4960   }
4961   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4962
4963   // Apply the kernel
4964   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4965   for (i = 0; i < kernal_size; ++i) {
4966     for (j = 0; j < kernal_size; ++j) {
4967       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4968                         &sum_val, &sum_weight);
4969       ++kernal_ptr;
4970     }
4971     tmp_ptr += stride;
4972   }
4973
4974   // Update the source value with the new filtered value
4975   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4976 }
4977
4978 #if CONFIG_VP9_HIGHBITDEPTH
4979 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4980                                          const int strength) {
4981   int sum_weight = 0;
4982   int sum_val = 0;
4983   int thresh = strength;
4984   int kernal_size = 5;
4985   int half_k_size = 2;
4986   int i, j;
4987   int max_diff = 0;
4988   uint16_t *tmp_ptr;
4989   uint8_t *kernal_ptr;
4990
4991   // Find the maximum deviation from the source point in the locale.
4992   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4993   for (i = 0; i < kernal_size + 2; ++i) {
4994     for (j = 0; j < kernal_size + 2; ++j) {
4995       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4996     }
4997     tmp_ptr += stride;
4998   }
4999
5000   // Select the kernel size.
5001   if (max_diff > (strength + (strength >> 1))) {
5002     kernal_size = 3;
5003     half_k_size = 1;
5004     thresh = thresh >> 1;
5005   }
5006   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
5007
5008   // Apply the kernel
5009   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
5010   for (i = 0; i < kernal_size; ++i) {
5011     for (j = 0; j < kernal_size; ++j) {
5012       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
5013                         &sum_val, &sum_weight);
5014       ++kernal_ptr;
5015     }
5016     tmp_ptr += stride;
5017   }
5018
5019   // Update the source value with the new filtered value
5020   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
5021 }
5022 #endif  // CONFIG_VP9_HIGHBITDEPTH
5023
5024 // Apply thresholded spatial noise suppression to a given buffer.
5025 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
5026                                    const int stride, const int width,
5027                                    const int height, const int strength) {
5028   VP9_COMMON *const cm = &cpi->common;
5029   uint8_t *src_ptr = buffer;
5030   int row;
5031   int col;
5032
5033   for (row = 0; row < height; ++row) {
5034     for (col = 0; col < width; ++col) {
5035 #if CONFIG_VP9_HIGHBITDEPTH
5036       if (cm->use_highbitdepth)
5037         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
5038                                      strength);
5039       else
5040         spatial_denoise_point(&src_ptr[col], stride, strength);
5041 #else
5042       spatial_denoise_point(&src_ptr[col], stride, strength);
5043 #endif  // CONFIG_VP9_HIGHBITDEPTH
5044     }
5045     src_ptr += stride;
5046   }
5047 }
5048
5049 // Apply thresholded spatial noise suppression to source.
5050 static void spatial_denoise_frame(VP9_COMP *cpi) {
5051   YV12_BUFFER_CONFIG *src = cpi->Source;
5052   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5053   TWO_PASS *const twopass = &cpi->twopass;
5054   VP9_COMMON *const cm = &cpi->common;
5055
5056   // Base the filter strength on the current active max Q.
5057   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
5058                                               cm->bit_depth));
5059   int strength =
5060       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
5061
5062   // Denoise each of Y,U and V buffers.
5063   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
5064                          src->y_height, strength);
5065
5066   strength += (strength >> 1);
5067   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
5068                          src->uv_height, strength << 1);
5069
5070   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
5071                          src->uv_height, strength << 1);
5072 }
5073 #endif  // ENABLE_KF_DENOISE
5074
5075 #if !CONFIG_REALTIME_ONLY
5076 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
5077                                          uint8_t *dest) {
5078   if (cpi->common.seg.enabled)
5079     if (ALT_REF_AQ_PROTECT_GAIN) {
5080       size_t nsize = *size;
5081       int overhead;
5082
5083       // TODO(yuryg): optimize this, as
5084       // we don't really need to repack
5085
5086       save_coding_context(cpi);
5087       vp9_disable_segmentation(&cpi->common.seg);
5088       vp9_pack_bitstream(cpi, dest, &nsize);
5089       restore_coding_context(cpi);
5090
5091       overhead = (int)*size - (int)nsize;
5092
5093       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
5094         vp9_encode_frame(cpi);
5095       else
5096         vp9_enable_segmentation(&cpi->common.seg);
5097     }
5098 }
5099 #endif
5100
5101 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
5102   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
5103
5104   if (ref_buffer) {
5105     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5106     ref_buffer->frame_index =
5107         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
5108     ref_buffer->frame_coding_index = cm->current_frame_coding_index;
5109   }
5110 }
5111
5112 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
5113   VP9_COMMON *cm = &cpi->common;
5114   ThreadData *td = &cpi->td;
5115   MACROBLOCK *x = &td->mb;
5116   MACROBLOCKD *xd = &x->e_mbd;
5117   uint8_t *y_buffer = cpi->Source->y_buffer;
5118   const int y_stride = cpi->Source->y_stride;
5119   const int block_size = BLOCK_16X16;
5120
5121   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
5122   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
5123   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
5124   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
5125   double log_sum = 0.0;
5126   int row, col;
5127
5128   // Loop through each 64x64 block.
5129   for (row = 0; row < num_rows; ++row) {
5130     for (col = 0; col < num_cols; ++col) {
5131       int mi_row, mi_col;
5132       double var = 0.0, num_of_var = 0.0;
5133       const int index = row * num_cols + col;
5134
5135       for (mi_row = row * num_8x8_h;
5136            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
5137         for (mi_col = col * num_8x8_w;
5138              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
5139           struct buf_2d buf;
5140           const int row_offset_y = mi_row << 3;
5141           const int col_offset_y = mi_col << 3;
5142
5143           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
5144           buf.stride = y_stride;
5145
5146           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
5147           // and high bit videos, the variance needs to be divided by 2.0 or
5148           // 64.0 separately.
5149           // TODO(sdeng): need to tune for 12bit videos.
5150 #if CONFIG_VP9_HIGHBITDEPTH
5151           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
5152             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
5153           else
5154 #endif
5155             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
5156
5157           num_of_var += 1.0;
5158         }
5159       }
5160       var = var / num_of_var / 64.0;
5161
5162       // Curve fitting with an exponential model on all 16x16 blocks from the
5163       // Midres dataset.
5164       var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
5165       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
5166       log_sum += log(var);
5167     }
5168   }
5169   log_sum = exp(log_sum / (double)(num_rows * num_cols));
5170
5171   for (row = 0; row < num_rows; ++row) {
5172     for (col = 0; col < num_cols; ++col) {
5173       const int index = row * num_cols + col;
5174       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
5175     }
5176   }
5177
5178   (void)xd;
5179 }
5180
5181 // Process the wiener variance in 16x16 block basis.
5182 static int qsort_comp(const void *elem1, const void *elem2) {
5183   int a = *((const int *)elem1);
5184   int b = *((const int *)elem2);
5185   if (a > b) return 1;
5186   if (a < b) return -1;
5187   return 0;
5188 }
5189
5190 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
5191   VP9_COMMON *cm = &cpi->common;
5192
5193   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
5194       cpi->mb_wiener_var_cols >= cm->mb_cols)
5195     return;
5196
5197   vpx_free(cpi->mb_wiener_variance);
5198   cpi->mb_wiener_variance = NULL;
5199
5200   CHECK_MEM_ERROR(
5201       cm, cpi->mb_wiener_variance,
5202       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
5203   cpi->mb_wiener_var_rows = cm->mb_rows;
5204   cpi->mb_wiener_var_cols = cm->mb_cols;
5205 }
5206
5207 static void set_mb_wiener_variance(VP9_COMP *cpi) {
5208   VP9_COMMON *cm = &cpi->common;
5209   uint8_t *buffer = cpi->Source->y_buffer;
5210   int buf_stride = cpi->Source->y_stride;
5211
5212 #if CONFIG_VP9_HIGHBITDEPTH
5213   ThreadData *td = &cpi->td;
5214   MACROBLOCK *x = &td->mb;
5215   MACROBLOCKD *xd = &x->e_mbd;
5216   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
5217   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
5218   uint8_t *zero_pred;
5219 #else
5220   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
5221 #endif
5222
5223   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
5224   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
5225
5226   int mb_row, mb_col, count = 0;
5227   // Hard coded operating block size
5228   const int block_size = 16;
5229   const int coeff_count = block_size * block_size;
5230   const TX_SIZE tx_size = TX_16X16;
5231
5232 #if CONFIG_VP9_HIGHBITDEPTH
5233   xd->cur_buf = cpi->Source;
5234   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5235     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
5236     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
5237   } else {
5238     zero_pred = zero_pred8;
5239     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
5240   }
5241 #else
5242   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
5243 #endif
5244
5245   cpi->norm_wiener_variance = 0;
5246
5247   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
5248     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
5249       int idx;
5250       int16_t median_val = 0;
5251       uint8_t *mb_buffer =
5252           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
5253       int64_t wiener_variance = 0;
5254
5255 #if CONFIG_VP9_HIGHBITDEPTH
5256       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5257         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
5258                                   mb_buffer, buf_stride, zero_pred, block_size,
5259                                   xd->bd);
5260         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5261       } else {
5262         vpx_subtract_block(block_size, block_size, src_diff, block_size,
5263                            mb_buffer, buf_stride, zero_pred, block_size);
5264         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5265       }
5266 #else
5267       vpx_subtract_block(block_size, block_size, src_diff, block_size,
5268                          mb_buffer, buf_stride, zero_pred, block_size);
5269       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5270 #endif  // CONFIG_VP9_HIGHBITDEPTH
5271
5272       coeff[0] = 0;
5273       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
5274
5275       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
5276
5277       // Noise level estimation
5278       median_val = coeff[coeff_count / 2];
5279
5280       // Wiener filter
5281       for (idx = 1; idx < coeff_count; ++idx) {
5282         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
5283         int64_t tmp_coeff = (int64_t)coeff[idx];
5284         if (median_val) {
5285           tmp_coeff = (sqr_coeff * coeff[idx]) /
5286                       (sqr_coeff + (int64_t)median_val * median_val);
5287         }
5288         wiener_variance += tmp_coeff * tmp_coeff;
5289       }
5290       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
5291           wiener_variance / coeff_count;
5292       cpi->norm_wiener_variance +=
5293           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
5294       ++count;
5295     }
5296   }
5297
5298   if (count) cpi->norm_wiener_variance /= count;
5299   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
5300 }
5301
5302 #if !CONFIG_REALTIME_ONLY
5303 static void update_encode_frame_result(
5304     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
5305     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
5306     RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
5307     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
5308 #if CONFIG_RATE_CTRL
5309     const PARTITION_INFO *partition_info,
5310     const MOTION_VECTOR_INFO *motion_vector_info,
5311     const TplDepStats *tpl_stats_info,
5312 #endif  // CONFIG_RATE_CTRL
5313     ENCODE_FRAME_RESULT *encode_frame_result);
5314 #endif  // !CONFIG_REALTIME_ONLY
5315
5316 static void encode_frame_to_data_rate(
5317     VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
5318     ENCODE_FRAME_RESULT *encode_frame_result) {
5319   VP9_COMMON *const cm = &cpi->common;
5320   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5321   struct segmentation *const seg = &cm->seg;
5322   TX_SIZE t;
5323
5324   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5325   // No need to set svc.skip_enhancement_layer if whole superframe will be
5326   // dropped.
5327   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5328       cpi->oxcf.target_bandwidth == 0 &&
5329       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5330         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5331          cpi->svc
5332              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5333                                                 1]) &&
5334         cpi->svc.drop_spatial_layer[0])) {
5335     cpi->svc.skip_enhancement_layer = 1;
5336     vp9_rc_postencode_update_drop_frame(cpi);
5337     cpi->ext_refresh_frame_flags_pending = 0;
5338     cpi->last_frame_dropped = 1;
5339     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5340     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5341     vp9_inc_frame_in_layer(cpi);
5342     return;
5343   }
5344
5345   set_ext_overrides(cpi);
5346   vpx_clear_system_state();
5347
5348 #ifdef ENABLE_KF_DENOISE
5349   // Spatial denoise of key frame.
5350   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5351 #endif
5352
5353   if (cm->show_existing_frame == 0) {
5354     // Update frame index
5355     set_frame_index(cpi, cm);
5356
5357     // Set the arf sign bias for this frame.
5358     set_ref_sign_bias(cpi);
5359   }
5360
5361   // Set default state for segment based loop filter update flags.
5362   cm->lf.mode_ref_delta_update = 0;
5363
5364   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5365     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5366
5367   // Set various flags etc to special state if it is a key frame.
5368   if (frame_is_intra_only(cm)) {
5369     // Reset the loop filter deltas and segmentation map.
5370     vp9_reset_segment_features(&cm->seg);
5371
5372     // If segmentation is enabled force a map update for key frames.
5373     if (seg->enabled) {
5374       seg->update_map = 1;
5375       seg->update_data = 1;
5376     }
5377
5378     // The alternate reference frame cannot be active for a key frame.
5379     cpi->rc.source_alt_ref_active = 0;
5380
5381     cm->error_resilient_mode = oxcf->error_resilient_mode;
5382     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5383
5384     // By default, encoder assumes decoder can use prev_mi.
5385     if (cm->error_resilient_mode) {
5386       cm->frame_parallel_decoding_mode = 1;
5387       cm->reset_frame_context = 0;
5388       cm->refresh_frame_context = 0;
5389     } else if (cm->intra_only) {
5390       // Only reset the current context.
5391       cm->reset_frame_context = 2;
5392     }
5393   }
5394
5395   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5396
5397   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5398     init_mb_wiener_var_buffer(cpi);
5399     set_mb_wiener_variance(cpi);
5400   }
5401
5402   vpx_clear_system_state();
5403
5404 #if CONFIG_INTERNAL_STATS
5405   memset(cpi->mode_chosen_counts, 0,
5406          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5407 #endif
5408 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5409   // Backup to ensure consistency between recodes
5410   save_encode_params(cpi);
5411 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5412
5413   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5414     if (!encode_without_recode_loop(cpi, size, dest)) return;
5415   } else {
5416 #if !CONFIG_REALTIME_ONLY
5417 #if CONFIG_RATE_CTRL
5418     encode_with_recode_loop(cpi, size, dest, &encode_frame_result->rq_history);
5419 #else   // CONFIG_RATE_CTRL
5420     encode_with_recode_loop(cpi, size, dest);
5421 #endif  // CONFIG_RATE_CTRL
5422 #endif  // !CONFIG_REALTIME_ONLY
5423   }
5424
5425   // TODO(jingning): When using show existing frame mode, we assume that the
5426   // current ARF will be directly used as the final reconstructed frame. This is
5427   // an encoder control scheme. One could in principle explore other
5428   // possibilities to arrange the reference frame buffer and their coding order.
5429   if (cm->show_existing_frame) {
5430     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5431                cm->ref_frame_map[cpi->alt_fb_idx]);
5432   }
5433
5434 #if !CONFIG_REALTIME_ONLY
5435   // Disable segmentation if it decrease rate/distortion ratio
5436   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5437     vp9_try_disable_lookahead_aq(cpi, size, dest);
5438 #endif
5439
5440 #if CONFIG_VP9_TEMPORAL_DENOISING
5441 #ifdef OUTPUT_YUV_DENOISED
5442   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5443     vpx_write_yuv_frame(yuv_denoised_file,
5444                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5445   }
5446 #endif
5447 #endif
5448 #ifdef OUTPUT_YUV_SKINMAP
5449   if (cpi->common.current_video_frame > 1) {
5450     vp9_output_skin_map(cpi, yuv_skinmap_file);
5451   }
5452 #endif
5453
5454   // Special case code to reduce pulsing when key frames are forced at a
5455   // fixed interval. Note the reconstruction error if it is the frame before
5456   // the force key frame
5457   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5458 #if CONFIG_VP9_HIGHBITDEPTH
5459     if (cm->use_highbitdepth) {
5460       cpi->ambient_err =
5461           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5462     } else {
5463       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5464     }
5465 #else
5466     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5467 #endif  // CONFIG_VP9_HIGHBITDEPTH
5468   }
5469
5470   // If the encoder forced a KEY_FRAME decision
5471   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5472
5473   cm->frame_to_show = get_frame_new_buffer(cm);
5474   cm->frame_to_show->color_space = cm->color_space;
5475   cm->frame_to_show->color_range = cm->color_range;
5476   cm->frame_to_show->render_width = cm->render_width;
5477   cm->frame_to_show->render_height = cm->render_height;
5478
5479   // Pick the loop filter level for the frame.
5480   loopfilter_frame(cpi, cm);
5481
5482   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5483
5484   // build the bitstream
5485   vp9_pack_bitstream(cpi, dest, size);
5486
5487   {
5488     const RefCntBuffer *coded_frame_buf =
5489         get_ref_cnt_buffer(cm, cm->new_fb_idx);
5490     vp9_extrc_update_encodeframe_result(
5491         &cpi->ext_ratectrl, (*size) << 3, cpi->Source, &coded_frame_buf->buf,
5492         cm->bit_depth, cpi->oxcf.input_bit_depth);
5493   }
5494 #if CONFIG_REALTIME_ONLY
5495   (void)encode_frame_result;
5496   assert(encode_frame_result == NULL);
5497 #else  // CONFIG_REALTIME_ONLY
5498   if (encode_frame_result != NULL) {
5499     const int ref_frame_flags = get_ref_frame_flags(cpi);
5500     const RefCntBuffer *coded_frame_buf =
5501         get_ref_cnt_buffer(cm, cm->new_fb_idx);
5502     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
5503     get_ref_frame_bufs(cpi, ref_frame_bufs);
5504     // update_encode_frame_result() depends on twopass.gf_group.index and
5505     // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
5506     // cpi->alt_fb_idx are updated for current frame and have
5507     // not been updated for the next frame yet.
5508     // The update locations are as follows.
5509     // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
5510     // for the first frame in the gf_group and is updated for the next frame at
5511     // vp9_twopass_postencode_update().
5512     // 2) cpi->Source is updated at the beginning of vp9_get_compressed_data()
5513     // 3) cm->new_fb_idx is updated at the beginning of
5514     // vp9_get_compressed_data() by get_free_fb(cm).
5515     // 4) cpi->lst_fb_idx/gld_fb_idx/alt_fb_idx will be updated for the next
5516     // frame at vp9_update_reference_frames().
5517     // This function needs to be called before vp9_update_reference_frames().
5518     // TODO(angiebird): Improve the codebase to make the update of frame
5519     // dependent variables more robust.
5520     update_encode_frame_result(
5521         ref_frame_flags,
5522         cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
5523         cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
5524         cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,
5525 #if CONFIG_RATE_CTRL
5526         cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,
5527 #endif  // CONFIG_RATE_CTRL
5528         encode_frame_result);
5529   }
5530 #endif  // CONFIG_REALTIME_ONLY
5531
5532   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5533       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5534     restore_coding_context(cpi);
5535     return;
5536   }
5537
5538   cpi->last_frame_dropped = 0;
5539   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5540   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5541     cpi->svc.num_encoded_top_layer++;
5542
5543   // Keep track of the frame buffer index updated/refreshed for the
5544   // current encoded TL0 superframe.
5545   if (cpi->svc.temporal_layer_id == 0) {
5546     if (cpi->refresh_last_frame)
5547       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5548     else if (cpi->refresh_golden_frame)
5549       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5550     else if (cpi->refresh_alt_ref_frame)
5551       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5552   }
5553
5554   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5555
5556   if (frame_is_intra_only(cm) == 0) {
5557     release_scaled_references(cpi);
5558   }
5559   vp9_update_reference_frames(cpi);
5560
5561   if (!cm->show_existing_frame) {
5562     for (t = TX_4X4; t <= TX_32X32; ++t) {
5563       full_to_model_counts(cpi->td.counts->coef[t],
5564                            cpi->td.rd_counts.coef_counts[t]);
5565     }
5566
5567     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5568       if (!frame_is_intra_only(cm)) {
5569         vp9_adapt_mode_probs(cm);
5570         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5571       }
5572       vp9_adapt_coef_probs(cm);
5573     }
5574   }
5575
5576   cpi->ext_refresh_frame_flags_pending = 0;
5577
5578   if (cpi->refresh_golden_frame == 1)
5579     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5580   else
5581     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5582
5583   if (cpi->refresh_alt_ref_frame == 1)
5584     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5585   else
5586     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5587
5588   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5589
5590   cm->last_frame_type = cm->frame_type;
5591
5592   vp9_rc_postencode_update(cpi, *size);
5593
5594   if (oxcf->pass == 0 && !frame_is_intra_only(cm) &&
5595       (!cpi->use_svc ||
5596        (cpi->use_svc &&
5597         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
5598         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) {
5599     vp9_compute_frame_low_motion(cpi);
5600   }
5601
5602   *size = VPXMAX(1, *size);
5603
5604 #if 0
5605   output_frame_level_debug_stats(cpi);
5606 #endif
5607
5608   if (cm->frame_type == KEY_FRAME) {
5609     // Tell the caller that the frame was coded as a key frame
5610     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5611   } else {
5612     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5613   }
5614
5615   // Clear the one shot update flags for segmentation map and mode/ref loop
5616   // filter deltas.
5617   cm->seg.update_map = 0;
5618   cm->seg.update_data = 0;
5619   cm->lf.mode_ref_delta_update = 0;
5620
5621   // keep track of the last coded dimensions
5622   cm->last_width = cm->width;
5623   cm->last_height = cm->height;
5624
5625   // reset to normal state now that we are done.
5626   if (!cm->show_existing_frame) {
5627     cm->last_show_frame = cm->show_frame;
5628     cm->prev_frame = cm->cur_frame;
5629   }
5630
5631   if (cm->show_frame) {
5632     vp9_swap_mi_and_prev_mi(cm);
5633     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5634   }
5635   update_frame_indexes(cm, cm->show_frame);
5636
5637   if (cpi->use_svc) {
5638     cpi->svc
5639         .layer_context[cpi->svc.spatial_layer_id *
5640                            cpi->svc.number_temporal_layers +
5641                        cpi->svc.temporal_layer_id]
5642         .last_frame_type = cm->frame_type;
5643     // Reset layer_sync back to 0 for next frame.
5644     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5645   }
5646
5647   cpi->force_update_segmentation = 0;
5648
5649 #if !CONFIG_REALTIME_ONLY
5650   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5651     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5652 #endif
5653
5654   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5655   cpi->svc.set_intra_only_frame = 0;
5656 }
5657
5658 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5659                       unsigned int *frame_flags) {
5660   vp9_rc_get_svc_params(cpi);
5661   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5662                             /*encode_frame_result = */ NULL);
5663 }
5664
5665 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5666                         unsigned int *frame_flags) {
5667   if (cpi->oxcf.rc_mode == VPX_CBR) {
5668     vp9_rc_get_one_pass_cbr_params(cpi);
5669   } else {
5670     vp9_rc_get_one_pass_vbr_params(cpi);
5671   }
5672   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5673                             /*encode_frame_result = */ NULL);
5674 }
5675
5676 #if !CONFIG_REALTIME_ONLY
5677 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5678                         unsigned int *frame_flags,
5679                         ENCODE_FRAME_RESULT *encode_frame_result) {
5680   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5681
5682   if (cpi->common.current_frame_coding_index == 0) {
5683     vp9_extrc_send_firstpass_stats(&cpi->ext_ratectrl,
5684                                    &cpi->twopass.first_pass_info);
5685   }
5686 #if CONFIG_MISMATCH_DEBUG
5687   mismatch_move_frame_idx_w();
5688 #endif
5689   encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
5690 }
5691 #endif  // !CONFIG_REALTIME_ONLY
5692
5693 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5694                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5695                           int64_t end_time) {
5696   VP9_COMMON *const cm = &cpi->common;
5697   struct vpx_usec_timer timer;
5698   int res = 0;
5699   const int subsampling_x = sd->subsampling_x;
5700   const int subsampling_y = sd->subsampling_y;
5701 #if CONFIG_VP9_HIGHBITDEPTH
5702   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5703 #else
5704   const int use_highbitdepth = 0;
5705 #endif
5706
5707   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5708 #if CONFIG_VP9_TEMPORAL_DENOISING
5709   setup_denoiser_buffer(cpi);
5710 #endif
5711
5712   alloc_raw_frame_buffers(cpi);
5713
5714   vpx_usec_timer_start(&timer);
5715
5716   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5717                          use_highbitdepth, frame_flags))
5718     res = -1;
5719   vpx_usec_timer_mark(&timer);
5720   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5721
5722   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5723       (subsampling_x != 1 || subsampling_y != 1)) {
5724     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5725                        "Non-4:2:0 color format requires profile 1 or 3");
5726     res = -1;
5727   }
5728   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5729       (subsampling_x == 1 && subsampling_y == 1)) {
5730     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5731                        "4:2:0 color format requires profile 0 or 2");
5732     res = -1;
5733   }
5734
5735   return res;
5736 }
5737
5738 static int frame_is_reference(const VP9_COMP *cpi) {
5739   const VP9_COMMON *cm = &cpi->common;
5740
5741   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5742          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5743          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5744          cm->seg.update_map || cm->seg.update_data;
5745 }
5746
5747 static void adjust_frame_rate(VP9_COMP *cpi,
5748                               const struct lookahead_entry *source) {
5749   int64_t this_duration;
5750   int step = 0;
5751
5752   if (source->ts_start == cpi->first_time_stamp_ever) {
5753     this_duration = source->ts_end - source->ts_start;
5754     step = 1;
5755   } else {
5756     int64_t last_duration =
5757         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5758
5759     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5760
5761     // do a step update if the duration changes by 10%
5762     if (last_duration)
5763       step = (int)((this_duration - last_duration) * 10 / last_duration);
5764   }
5765
5766   if (this_duration) {
5767     if (step) {
5768       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5769     } else {
5770       // Average this frame's rate into the last second's average
5771       // frame rate. If we haven't seen 1 second yet, then average
5772       // over the whole interval seen.
5773       const double interval = VPXMIN(
5774           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5775       double avg_duration = 10000000.0 / cpi->framerate;
5776       avg_duration *= (interval - avg_duration + this_duration);
5777       avg_duration /= interval;
5778
5779       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5780     }
5781   }
5782   cpi->last_time_stamp_seen = source->ts_start;
5783   cpi->last_end_time_stamp_seen = source->ts_end;
5784 }
5785
5786 // Returns 0 if this is not an alt ref else the offset of the source frame
5787 // used as the arf midpoint.
5788 static int get_arf_src_index(VP9_COMP *cpi) {
5789   RATE_CONTROL *const rc = &cpi->rc;
5790   int arf_src_index = 0;
5791   if (is_altref_enabled(cpi)) {
5792     if (cpi->oxcf.pass == 2) {
5793       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5794       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5795         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5796       }
5797     } else if (rc->source_alt_ref_pending) {
5798       arf_src_index = rc->frames_till_gf_update_due;
5799     }
5800   }
5801   return arf_src_index;
5802 }
5803
5804 static void check_src_altref(VP9_COMP *cpi,
5805                              const struct lookahead_entry *source) {
5806   RATE_CONTROL *const rc = &cpi->rc;
5807
5808   if (cpi->oxcf.pass == 2) {
5809     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5810     rc->is_src_frame_alt_ref =
5811         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5812   } else {
5813     rc->is_src_frame_alt_ref =
5814         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5815   }
5816
5817   if (rc->is_src_frame_alt_ref) {
5818     // Current frame is an ARF overlay frame.
5819     cpi->alt_ref_source = NULL;
5820
5821     // Don't refresh the last buffer for an ARF overlay frame. It will
5822     // become the GF so preserve last as an alternative prediction option.
5823     cpi->refresh_last_frame = 0;
5824   }
5825 }
5826
5827 #if CONFIG_INTERNAL_STATS
5828 static void adjust_image_stat(double y, double u, double v, double all,
5829                               ImageStat *s) {
5830   s->stat[Y] += y;
5831   s->stat[U] += u;
5832   s->stat[V] += v;
5833   s->stat[ALL] += all;
5834   s->worst = VPXMIN(s->worst, all);
5835 }
5836 #endif  // CONFIG_INTERNAL_STATS
5837
5838 // Adjust the maximum allowable frame size for the target level.
5839 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5840   RATE_CONTROL *const rc = &cpi->rc;
5841   LevelConstraint *const ls = &cpi->level_constraint;
5842   VP9_COMMON *const cm = &cpi->common;
5843   const double max_cpb_size = ls->max_cpb_size;
5844   vpx_clear_system_state();
5845   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5846   if (frame_is_intra_only(cm)) {
5847     rc->max_frame_bandwidth =
5848         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5849   } else if (arf_src_index > 0) {
5850     rc->max_frame_bandwidth =
5851         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5852   } else {
5853     rc->max_frame_bandwidth =
5854         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5855   }
5856 }
5857
5858 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5859   VP9_COMMON *const cm = &cpi->common;
5860   Vp9LevelInfo *const level_info = &cpi->level_info;
5861   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5862   Vp9LevelStats *const level_stats = &level_info->level_stats;
5863   int i, idx;
5864   uint64_t luma_samples, dur_end;
5865   const uint32_t luma_pic_size = cm->width * cm->height;
5866   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5867   LevelConstraint *const level_constraint = &cpi->level_constraint;
5868   const int8_t level_index = level_constraint->level_index;
5869   double cpb_data_size;
5870
5871   vpx_clear_system_state();
5872
5873   // update level_stats
5874   level_stats->total_compressed_size += *size;
5875   if (cm->show_frame) {
5876     level_stats->total_uncompressed_size +=
5877         luma_pic_size +
5878         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5879     level_stats->time_encoded =
5880         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5881         (double)TICKS_PER_SEC;
5882   }
5883
5884   if (arf_src_index > 0) {
5885     if (!level_stats->seen_first_altref) {
5886       level_stats->seen_first_altref = 1;
5887     } else if (level_stats->frames_since_last_altref <
5888                level_spec->min_altref_distance) {
5889       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5890     }
5891     level_stats->frames_since_last_altref = 0;
5892   } else {
5893     ++level_stats->frames_since_last_altref;
5894   }
5895
5896   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5897     idx = (level_stats->frame_window_buffer.start +
5898            level_stats->frame_window_buffer.len++) %
5899           FRAME_WINDOW_SIZE;
5900   } else {
5901     idx = level_stats->frame_window_buffer.start;
5902     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5903   }
5904   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5905   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5906   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5907
5908   if (cm->frame_type == KEY_FRAME) {
5909     level_stats->ref_refresh_map = 0;
5910   } else {
5911     int count = 0;
5912     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5913     // Also need to consider the case where the encoder refers to a buffer
5914     // that has been implicitly refreshed after encoding a keyframe.
5915     if (!cm->intra_only) {
5916       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5917       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5918       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5919     }
5920     for (i = 0; i < REF_FRAMES; ++i) {
5921       count += (level_stats->ref_refresh_map >> i) & 1;
5922     }
5923     if (count > level_spec->max_ref_frame_buffers) {
5924       level_spec->max_ref_frame_buffers = count;
5925     }
5926   }
5927
5928   // update average_bitrate
5929   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5930                                 125.0 / level_stats->time_encoded;
5931
5932   // update max_luma_sample_rate
5933   luma_samples = 0;
5934   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5935     idx = (level_stats->frame_window_buffer.start +
5936            level_stats->frame_window_buffer.len - 1 - i) %
5937           FRAME_WINDOW_SIZE;
5938     if (i == 0) {
5939       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5940     }
5941     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5942         TICKS_PER_SEC) {
5943       break;
5944     }
5945     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5946   }
5947   if (luma_samples > level_spec->max_luma_sample_rate) {
5948     level_spec->max_luma_sample_rate = luma_samples;
5949   }
5950
5951   // update max_cpb_size
5952   cpb_data_size = 0;
5953   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5954     if (i >= level_stats->frame_window_buffer.len) break;
5955     idx = (level_stats->frame_window_buffer.start +
5956            level_stats->frame_window_buffer.len - 1 - i) %
5957           FRAME_WINDOW_SIZE;
5958     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5959   }
5960   cpb_data_size = cpb_data_size / 125.0;
5961   if (cpb_data_size > level_spec->max_cpb_size) {
5962     level_spec->max_cpb_size = cpb_data_size;
5963   }
5964
5965   // update max_luma_picture_size
5966   if (luma_pic_size > level_spec->max_luma_picture_size) {
5967     level_spec->max_luma_picture_size = luma_pic_size;
5968   }
5969
5970   // update max_luma_picture_breadth
5971   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5972     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5973   }
5974
5975   // update compression_ratio
5976   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5977                                   cm->bit_depth /
5978                                   level_stats->total_compressed_size / 8.0;
5979
5980   // update max_col_tiles
5981   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5982     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5983   }
5984
5985   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5986     if (level_spec->max_luma_picture_size >
5987         vp9_level_defs[level_index].max_luma_picture_size) {
5988       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5989       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5990                          "Failed to encode to the target level %d. %s",
5991                          vp9_level_defs[level_index].level,
5992                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5993     }
5994
5995     if (level_spec->max_luma_picture_breadth >
5996         vp9_level_defs[level_index].max_luma_picture_breadth) {
5997       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5998       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5999                          "Failed to encode to the target level %d. %s",
6000                          vp9_level_defs[level_index].level,
6001                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
6002     }
6003
6004     if ((double)level_spec->max_luma_sample_rate >
6005         (double)vp9_level_defs[level_index].max_luma_sample_rate *
6006             (1 + SAMPLE_RATE_GRACE_P)) {
6007       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
6008       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
6009                          "Failed to encode to the target level %d. %s",
6010                          vp9_level_defs[level_index].level,
6011                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
6012     }
6013
6014     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
6015       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
6016       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
6017                          "Failed to encode to the target level %d. %s",
6018                          vp9_level_defs[level_index].level,
6019                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
6020     }
6021
6022     if (level_spec->min_altref_distance <
6023         vp9_level_defs[level_index].min_altref_distance) {
6024       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
6025       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
6026                          "Failed to encode to the target level %d. %s",
6027                          vp9_level_defs[level_index].level,
6028                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
6029     }
6030
6031     if (level_spec->max_ref_frame_buffers >
6032         vp9_level_defs[level_index].max_ref_frame_buffers) {
6033       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
6034       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
6035                          "Failed to encode to the target level %d. %s",
6036                          vp9_level_defs[level_index].level,
6037                          level_fail_messages[TOO_MANY_REF_BUFFER]);
6038     }
6039
6040     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
6041       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
6042       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
6043                          "Failed to encode to the target level %d. %s",
6044                          vp9_level_defs[level_index].level,
6045                          level_fail_messages[CPB_TOO_LARGE]);
6046     }
6047
6048     // Set an upper bound for the next frame size. It will be used in
6049     // level_rc_framerate() before encoding the next frame.
6050     cpb_data_size = 0;
6051     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
6052       if (i >= level_stats->frame_window_buffer.len) break;
6053       idx = (level_stats->frame_window_buffer.start +
6054              level_stats->frame_window_buffer.len - 1 - i) %
6055             FRAME_WINDOW_SIZE;
6056       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
6057     }
6058     cpb_data_size = cpb_data_size / 125.0;
6059     level_constraint->max_frame_size =
6060         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
6061               1000.0);
6062     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
6063       level_constraint->max_frame_size >>= 1;
6064   }
6065 }
6066
6067 typedef struct GF_PICTURE {
6068   YV12_BUFFER_CONFIG *frame;
6069   int ref_frame[3];
6070   FRAME_UPDATE_TYPE update_type;
6071 } GF_PICTURE;
6072
6073 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6074                             const GF_GROUP *gf_group, int *tpl_group_frames) {
6075   VP9_COMMON *cm = &cpi->common;
6076   int frame_idx = 0;
6077   int i;
6078   int gld_index = -1;
6079   int alt_index = -1;
6080   int lst_index = -1;
6081   int arf_index_stack[MAX_ARF_LAYERS];
6082   int arf_stack_size = 0;
6083   int extend_frame_count = 0;
6084   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
6085   int frame_gop_offset = 0;
6086
6087   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
6088   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
6089
6090   memset(recon_frame_index, -1, sizeof(recon_frame_index));
6091   stack_init(arf_index_stack, MAX_ARF_LAYERS);
6092
6093   // TODO(jingning): To be used later for gf frame type parsing.
6094   (void)gf_group;
6095
6096   for (i = 0; i < FRAME_BUFFERS; ++i) {
6097     if (frame_bufs[i].ref_count == 0) {
6098       alloc_frame_mvs(cm, i);
6099       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
6100                                    cm->subsampling_x, cm->subsampling_y,
6101 #if CONFIG_VP9_HIGHBITDEPTH
6102                                    cm->use_highbitdepth,
6103 #endif
6104                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
6105                                    NULL, NULL, NULL))
6106         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
6107                            "Failed to allocate frame buffer");
6108
6109       recon_frame_index[frame_idx] = i;
6110       ++frame_idx;
6111
6112       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
6113     }
6114   }
6115
6116   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
6117     assert(recon_frame_index[i] >= 0);
6118     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
6119   }
6120
6121   *tpl_group_frames = 0;
6122
6123   // Initialize Golden reference frame.
6124   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
6125   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
6126   gf_picture[0].update_type = gf_group->update_type[0];
6127   gld_index = 0;
6128   ++*tpl_group_frames;
6129
6130   // Initialize base layer ARF frame
6131   gf_picture[1].frame = cpi->Source;
6132   gf_picture[1].ref_frame[0] = gld_index;
6133   gf_picture[1].ref_frame[1] = lst_index;
6134   gf_picture[1].ref_frame[2] = alt_index;
6135   gf_picture[1].update_type = gf_group->update_type[1];
6136   alt_index = 1;
6137   ++*tpl_group_frames;
6138
6139   // Initialize P frames
6140   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6141     struct lookahead_entry *buf;
6142     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
6143     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6144
6145     if (buf == NULL) break;
6146
6147     gf_picture[frame_idx].frame = &buf->img;
6148     gf_picture[frame_idx].ref_frame[0] = gld_index;
6149     gf_picture[frame_idx].ref_frame[1] = lst_index;
6150     gf_picture[frame_idx].ref_frame[2] = alt_index;
6151     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
6152
6153     switch (gf_group->update_type[frame_idx]) {
6154       case ARF_UPDATE:
6155         stack_push(arf_index_stack, alt_index, arf_stack_size);
6156         ++arf_stack_size;
6157         alt_index = frame_idx;
6158         break;
6159       case LF_UPDATE: lst_index = frame_idx; break;
6160       case OVERLAY_UPDATE:
6161         gld_index = frame_idx;
6162         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6163         --arf_stack_size;
6164         break;
6165       case USE_BUF_FRAME:
6166         lst_index = alt_index;
6167         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6168         --arf_stack_size;
6169         break;
6170       default: break;
6171     }
6172
6173     ++*tpl_group_frames;
6174
6175     // The length of group of pictures is baseline_gf_interval, plus the
6176     // beginning golden frame from last GOP, plus the last overlay frame in
6177     // the same GOP.
6178     if (frame_idx == gf_group->gf_group_size) break;
6179   }
6180
6181   alt_index = -1;
6182   ++frame_idx;
6183   ++frame_gop_offset;
6184
6185   // Extend two frames outside the current gf group.
6186   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
6187     struct lookahead_entry *buf =
6188         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6189
6190     if (buf == NULL) break;
6191
6192     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
6193
6194     gf_picture[frame_idx].frame = &buf->img;
6195     gf_picture[frame_idx].ref_frame[0] = gld_index;
6196     gf_picture[frame_idx].ref_frame[1] = lst_index;
6197     gf_picture[frame_idx].ref_frame[2] = alt_index;
6198     gf_picture[frame_idx].update_type = LF_UPDATE;
6199     lst_index = frame_idx;
6200     ++*tpl_group_frames;
6201     ++extend_frame_count;
6202     ++frame_gop_offset;
6203   }
6204 }
6205
6206 static void init_tpl_stats(VP9_COMP *cpi) {
6207   int frame_idx;
6208   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6209     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6210     memset(tpl_frame->tpl_stats_ptr, 0,
6211            tpl_frame->height * tpl_frame->width *
6212                sizeof(*tpl_frame->tpl_stats_ptr));
6213     tpl_frame->is_valid = 0;
6214   }
6215 }
6216
6217 #if CONFIG_NON_GREEDY_MV
6218 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6219                                          MotionField *motion_field,
6220                                          int frame_idx, uint8_t *cur_frame_buf,
6221                                          uint8_t *ref_frame_buf, int stride,
6222                                          BLOCK_SIZE bsize, int mi_row,
6223                                          int mi_col, MV *mv) {
6224   MACROBLOCK *const x = &td->mb;
6225   MACROBLOCKD *const xd = &x->e_mbd;
6226   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6227   int step_param;
6228   uint32_t bestsme = UINT_MAX;
6229   const MvLimits tmp_mv_limits = x->mv_limits;
6230   // lambda is used to adjust the importance of motion vector consistency.
6231   // TODO(angiebird): Figure out lambda's proper value.
6232   const int lambda = cpi->tpl_stats[frame_idx].lambda;
6233   int_mv nb_full_mvs[NB_MVS_NUM];
6234   int nb_full_mv_num;
6235
6236   MV best_ref_mv1 = { 0, 0 };
6237   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6238
6239   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6240   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6241
6242   // Setup frame pointers
6243   x->plane[0].src.buf = cur_frame_buf;
6244   x->plane[0].src.stride = stride;
6245   xd->plane[0].pre[0].buf = ref_frame_buf;
6246   xd->plane[0].pre[0].stride = stride;
6247
6248   step_param = mv_sf->reduce_first_step_size;
6249   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6250
6251   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6252
6253   nb_full_mv_num =
6254       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
6255   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
6256                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
6257
6258   /* restore UMV window */
6259   x->mv_limits = tmp_mv_limits;
6260
6261   return bestsme;
6262 }
6263
6264 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6265                                         uint8_t *cur_frame_buf,
6266                                         uint8_t *ref_frame_buf, int stride,
6267                                         BLOCK_SIZE bsize, MV *mv) {
6268   MACROBLOCK *const x = &td->mb;
6269   MACROBLOCKD *const xd = &x->e_mbd;
6270   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6271   uint32_t bestsme = UINT_MAX;
6272   uint32_t distortion;
6273   uint32_t sse;
6274   int cost_list[5];
6275
6276   MV best_ref_mv1 = { 0, 0 };
6277
6278   // Setup frame pointers
6279   x->plane[0].src.buf = cur_frame_buf;
6280   x->plane[0].src.stride = stride;
6281   xd->plane[0].pre[0].buf = ref_frame_buf;
6282   xd->plane[0].pre[0].stride = stride;
6283
6284   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6285   // Ignore mv costing by sending NULL pointer instead of cost array
6286   bestsme = cpi->find_fractional_mv_step(
6287       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6288       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6289       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6290       USE_2_TAPS);
6291
6292   return bestsme;
6293 }
6294
6295 #else  // CONFIG_NON_GREEDY_MV
6296 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
6297                                               uint8_t *cur_frame_buf,
6298                                               uint8_t *ref_frame_buf,
6299                                               int stride, BLOCK_SIZE bsize,
6300                                               MV *mv) {
6301   MACROBLOCK *const x = &td->mb;
6302   MACROBLOCKD *const xd = &x->e_mbd;
6303   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6304   const SEARCH_METHODS search_method = NSTEP;
6305   int step_param;
6306   int sadpb = x->sadperbit16;
6307   uint32_t bestsme = UINT_MAX;
6308   uint32_t distortion;
6309   uint32_t sse;
6310   int cost_list[5];
6311   const MvLimits tmp_mv_limits = x->mv_limits;
6312
6313   MV best_ref_mv1 = { 0, 0 };
6314   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6315
6316   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6317   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6318
6319   // Setup frame pointers
6320   x->plane[0].src.buf = cur_frame_buf;
6321   x->plane[0].src.stride = stride;
6322   xd->plane[0].pre[0].buf = ref_frame_buf;
6323   xd->plane[0].pre[0].stride = stride;
6324
6325   step_param = mv_sf->reduce_first_step_size;
6326   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6327
6328   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6329
6330   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
6331                         search_method, sadpb, cond_cost_list(cpi, cost_list),
6332                         &best_ref_mv1, mv, 0, 0);
6333
6334   /* restore UMV window */
6335   x->mv_limits = tmp_mv_limits;
6336
6337   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6338   // Ignore mv costing by sending NULL pointer instead of cost array
6339   bestsme = cpi->find_fractional_mv_step(
6340       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6341       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6342       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6343       USE_2_TAPS);
6344
6345   return bestsme;
6346 }
6347 #endif
6348
6349 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6350                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6351   int width = 0, height = 0;
6352   int bw = 4 << b_width_log2_lookup[bsize];
6353   int bh = 4 << b_height_log2_lookup[bsize];
6354
6355   switch (block) {
6356     case 0:
6357       width = grid_pos_col + bw - ref_pos_col;
6358       height = grid_pos_row + bh - ref_pos_row;
6359       break;
6360     case 1:
6361       width = ref_pos_col + bw - grid_pos_col;
6362       height = grid_pos_row + bh - ref_pos_row;
6363       break;
6364     case 2:
6365       width = grid_pos_col + bw - ref_pos_col;
6366       height = ref_pos_row + bh - grid_pos_row;
6367       break;
6368     case 3:
6369       width = ref_pos_col + bw - grid_pos_col;
6370       height = ref_pos_row + bh - grid_pos_row;
6371       break;
6372     default: assert(0);
6373   }
6374
6375   return width * height;
6376 }
6377
6378 static int round_floor(int ref_pos, int bsize_pix) {
6379   int round;
6380   if (ref_pos < 0)
6381     round = -(1 + (-ref_pos - 1) / bsize_pix);
6382   else
6383     round = ref_pos / bsize_pix;
6384
6385   return round;
6386 }
6387
6388 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6389                             BLOCK_SIZE bsize, int stride) {
6390   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6391   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6392   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6393   int idx, idy;
6394
6395   for (idy = 0; idy < mi_height; ++idy) {
6396     for (idx = 0; idx < mi_width; ++idx) {
6397       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6398       const int64_t mc_flow = tpl_ptr->mc_flow;
6399       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6400       *tpl_ptr = *src_stats;
6401       tpl_ptr->mc_flow = mc_flow;
6402       tpl_ptr->mc_ref_cost = mc_ref_cost;
6403       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6404     }
6405   }
6406 }
6407
6408 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6409                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6410   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6411   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6412   MV mv = tpl_stats->mv.as_mv;
6413   int mv_row = mv.row >> 3;
6414   int mv_col = mv.col >> 3;
6415
6416   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6417   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6418
6419   const int bw = 4 << b_width_log2_lookup[bsize];
6420   const int bh = 4 << b_height_log2_lookup[bsize];
6421   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6422   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6423   const int pix_num = bw * bh;
6424
6425   // top-left on grid block location in pixel
6426   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6427   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6428   int block;
6429
6430   for (block = 0; block < 4; ++block) {
6431     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6432     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6433
6434     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6435         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6436       int overlap_area = get_overlap_area(
6437           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6438       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6439       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6440
6441       int64_t mc_flow = tpl_stats->mc_dep_cost -
6442                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6443                             tpl_stats->intra_cost;
6444
6445       int idx, idy;
6446
6447       for (idy = 0; idy < mi_height; ++idy) {
6448         for (idx = 0; idx < mi_width; ++idx) {
6449           TplDepStats *des_stats =
6450               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6451                          (ref_mi_col + idx)];
6452
6453           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6454           des_stats->mc_ref_cost +=
6455               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6456               pix_num;
6457           assert(overlap_area >= 0);
6458         }
6459       }
6460     }
6461   }
6462 }
6463
6464 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6465                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6466   int idx, idy;
6467   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6468   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6469
6470   for (idy = 0; idy < mi_height; ++idy) {
6471     for (idx = 0; idx < mi_width; ++idx) {
6472       TplDepStats *tpl_ptr =
6473           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6474       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6475                          BLOCK_8X8);
6476     }
6477   }
6478 }
6479
6480 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6481                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6482                                TX_SIZE tx_size, int64_t *recon_error,
6483                                int64_t *sse) {
6484   MACROBLOCKD *const xd = &x->e_mbd;
6485   const struct macroblock_plane *const p = &x->plane[plane];
6486   const struct macroblockd_plane *const pd = &xd->plane[plane];
6487   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6488   uint16_t eob;
6489   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6490   const int shift = tx_size == TX_32X32 ? 0 : 2;
6491
6492 #if CONFIG_VP9_HIGHBITDEPTH
6493   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6494     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6495                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6496                                  &eob, scan_order->scan, scan_order->iscan);
6497   } else {
6498     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6499                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6500                           scan_order->scan, scan_order->iscan);
6501   }
6502 #else
6503   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6504                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6505                         scan_order->iscan);
6506 #endif  // CONFIG_VP9_HIGHBITDEPTH
6507
6508   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6509   *recon_error = VPXMAX(*recon_error, 1);
6510
6511   *sse = (*sse) >> shift;
6512   *sse = VPXMAX(*sse, 1);
6513 }
6514
6515 #if CONFIG_VP9_HIGHBITDEPTH
6516 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6517                          TX_SIZE tx_size) {
6518   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6519   switch (tx_size) {
6520     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6521     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6522     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6523     default: assert(0);
6524   }
6525 }
6526 #endif  // CONFIG_VP9_HIGHBITDEPTH
6527
6528 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6529                   TX_SIZE tx_size) {
6530   switch (tx_size) {
6531     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6532     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6533     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6534     default: assert(0);
6535   }
6536 }
6537
6538 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6539                           int mi_col) {
6540   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6541   x->mv_limits.row_max =
6542       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6543   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6544   x->mv_limits.col_max =
6545       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6546 }
6547
6548 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6549                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6550                             int frame_idx, TplDepFrame *tpl_frame,
6551                             int16_t *src_diff, tran_low_t *coeff,
6552                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6553                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6554                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6555                             int64_t *recon_error, int64_t *sse) {
6556   VP9_COMMON *cm = &cpi->common;
6557   ThreadData *td = &cpi->td;
6558
6559   const int bw = 4 << b_width_log2_lookup[bsize];
6560   const int bh = 4 << b_height_log2_lookup[bsize];
6561   const int pix_num = bw * bh;
6562   int best_rf_idx = -1;
6563   int_mv best_mv;
6564   int64_t best_inter_cost = INT64_MAX;
6565   int64_t inter_cost;
6566   int rf_idx;
6567   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6568
6569   int64_t best_intra_cost = INT64_MAX;
6570   int64_t intra_cost;
6571   PREDICTION_MODE mode;
6572   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6573   MODE_INFO mi_above, mi_left;
6574   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6575   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6576   TplDepStats *tpl_stats =
6577       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6578
6579   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6580   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6581   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6582   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6583   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6584   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6585
6586   // Intra prediction search
6587   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6588     uint8_t *src, *dst;
6589     int src_stride, dst_stride;
6590
6591     src = xd->cur_buf->y_buffer + mb_y_offset;
6592     src_stride = xd->cur_buf->y_stride;
6593
6594     dst = &predictor[0];
6595     dst_stride = bw;
6596
6597     xd->mi[0]->sb_type = bsize;
6598     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6599
6600     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6601                             src_stride, dst, dst_stride, 0, 0, 0);
6602
6603 #if CONFIG_VP9_HIGHBITDEPTH
6604     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6605       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6606                                 dst_stride, xd->bd);
6607       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6608       intra_cost = vpx_highbd_satd(coeff, pix_num);
6609     } else {
6610       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6611                          dst_stride);
6612       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6613       intra_cost = vpx_satd(coeff, pix_num);
6614     }
6615 #else
6616     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6617     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6618     intra_cost = vpx_satd(coeff, pix_num);
6619 #endif  // CONFIG_VP9_HIGHBITDEPTH
6620
6621     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6622   }
6623
6624   // Motion compensated prediction
6625   best_mv.as_int = 0;
6626
6627   set_mv_limits(cm, x, mi_row, mi_col);
6628
6629   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6630     int_mv mv;
6631 #if CONFIG_NON_GREEDY_MV
6632     MotionField *motion_field;
6633 #endif
6634     if (ref_frame[rf_idx] == NULL) continue;
6635
6636 #if CONFIG_NON_GREEDY_MV
6637     (void)td;
6638     motion_field = vp9_motion_field_info_get_motion_field(
6639         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6640     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6641 #else
6642     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6643                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6644                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6645 #endif
6646
6647 #if CONFIG_VP9_HIGHBITDEPTH
6648     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6649       vp9_highbd_build_inter_predictor(
6650           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6651           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6652           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6653           mi_row * MI_SIZE, xd->bd);
6654       vpx_highbd_subtract_block(
6655           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6656           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6657       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6658       inter_cost = vpx_highbd_satd(coeff, pix_num);
6659     } else {
6660       vp9_build_inter_predictor(
6661           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6662           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6663           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6664       vpx_subtract_block(bh, bw, src_diff, bw,
6665                          xd->cur_buf->y_buffer + mb_y_offset,
6666                          xd->cur_buf->y_stride, &predictor[0], bw);
6667       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6668       inter_cost = vpx_satd(coeff, pix_num);
6669     }
6670 #else
6671     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6672                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6673                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6674                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6675     vpx_subtract_block(bh, bw, src_diff, bw,
6676                        xd->cur_buf->y_buffer + mb_y_offset,
6677                        xd->cur_buf->y_stride, &predictor[0], bw);
6678     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6679     inter_cost = vpx_satd(coeff, pix_num);
6680 #endif
6681
6682     if (inter_cost < best_inter_cost) {
6683       best_rf_idx = rf_idx;
6684       best_inter_cost = inter_cost;
6685       best_mv.as_int = mv.as_int;
6686       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6687                          sse);
6688     }
6689   }
6690   best_intra_cost = VPXMAX(best_intra_cost, 1);
6691   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6692   tpl_stats->inter_cost = VPXMAX(
6693       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6694   tpl_stats->intra_cost = VPXMAX(
6695       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6696   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6697   tpl_stats->mv.as_int = best_mv.as_int;
6698 }
6699
6700 #if CONFIG_NON_GREEDY_MV
6701 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6702                                   int frame_idx, int rf_idx, int mi_row,
6703                                   int mi_col, struct buf_2d *src,
6704                                   struct buf_2d *pre) {
6705   const int mb_y_offset =
6706       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6707   YV12_BUFFER_CONFIG *ref_frame = NULL;
6708   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6709   if (ref_frame_idx != -1) {
6710     ref_frame = gf_picture[ref_frame_idx].frame;
6711     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6712     src->stride = xd->cur_buf->y_stride;
6713     pre->buf = ref_frame->y_buffer + mb_y_offset;
6714     pre->stride = ref_frame->y_stride;
6715     assert(src->stride == pre->stride);
6716     return 1;
6717   } else {
6718     printf("invalid ref_frame_idx");
6719     assert(ref_frame_idx != -1);
6720     return 0;
6721   }
6722 }
6723
6724 #define kMvPreCheckLines 5
6725 #define kMvPreCheckSize 15
6726
6727 #define MV_REF_POS_NUM 3
6728 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6729   { -1, 0 },
6730   { 0, -1 },
6731   { -1, -1 },
6732 };
6733
6734 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6735                              int mi_col) {
6736   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6737 }
6738
6739 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6740                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6741   int i;
6742   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6743   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6744   int_mv nearest_mv, near_mv, invalid_mv;
6745   nearest_mv.as_int = INVALID_MV;
6746   near_mv.as_int = INVALID_MV;
6747   invalid_mv.as_int = INVALID_MV;
6748   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6749     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6750     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6751     assert(mv_ref_pos[i].row <= 0);
6752     assert(mv_ref_pos[i].col <= 0);
6753     if (nb_row >= 0 && nb_col >= 0) {
6754       if (nearest_mv.as_int == INVALID_MV) {
6755         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6756       } else {
6757         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6758         if (mv.as_int == nearest_mv.as_int) {
6759           continue;
6760         } else {
6761           near_mv = mv;
6762           break;
6763         }
6764       }
6765     }
6766   }
6767   if (nearest_mv.as_int == INVALID_MV) {
6768     nearest_mv.as_mv.row = 0;
6769     nearest_mv.as_mv.col = 0;
6770   }
6771   if (near_mv.as_int == INVALID_MV) {
6772     near_mv.as_mv.row = 0;
6773     near_mv.as_mv.col = 0;
6774   }
6775   if (mv_mode == NEAREST_MV_MODE) {
6776     return nearest_mv;
6777   }
6778   if (mv_mode == NEAR_MV_MODE) {
6779     return near_mv;
6780   }
6781   assert(0);
6782   return invalid_mv;
6783 }
6784
6785 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6786                                   MotionField *motion_field,
6787                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6788                                   int mi_row, int mi_col) {
6789   int_mv mv;
6790   switch (mv_mode) {
6791     case ZERO_MV_MODE:
6792       mv.as_mv.row = 0;
6793       mv.as_mv.col = 0;
6794       break;
6795     case NEW_MV_MODE:
6796       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6797       break;
6798     case NEAREST_MV_MODE:
6799       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6800       break;
6801     case NEAR_MV_MODE:
6802       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6803       break;
6804     default:
6805       mv.as_int = INVALID_MV;
6806       assert(0);
6807       break;
6808   }
6809   return mv;
6810 }
6811
6812 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6813                           GF_PICTURE *gf_picture, MotionField *motion_field,
6814                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6815                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6816                           int_mv *mv) {
6817   uint32_t sse;
6818   struct buf_2d src;
6819   struct buf_2d pre;
6820   MV full_mv;
6821   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6822                             mi_row, mi_col);
6823   full_mv = get_full_mv(&mv->as_mv);
6824   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6825                              &src, &pre)) {
6826     // TODO(angiebird): Consider subpixel when computing the sse.
6827     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6828                           pre.stride, &sse);
6829     return (double)(sse << VP9_DIST_SCALE_LOG2);
6830   } else {
6831     assert(0);
6832     return 0;
6833   }
6834 }
6835
6836 static int get_mv_mode_cost(int mv_mode) {
6837   // TODO(angiebird): The probabilities are roughly inferred from
6838   // default_inter_mode_probs. Check if there is a better way to set the
6839   // probabilities.
6840   const int zero_mv_prob = 16;
6841   const int new_mv_prob = 24 * 1;
6842   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6843   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6844   switch (mv_mode) {
6845     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6846     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6847     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6848     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6849     default: assert(0); return -1;
6850   }
6851 }
6852
6853 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6854   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6855                         log2(1 + abs(new_mv->col - ref_mv->col));
6856   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6857   return mv_diff_cost;
6858 }
6859 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6860                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6861                           int mi_col) {
6862   double mv_cost = get_mv_mode_cost(mv_mode);
6863   if (mv_mode == NEW_MV_MODE) {
6864     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6865                                     bsize, mi_row, mi_col)
6866                     .as_mv;
6867     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6868                                         tpl_frame, bsize, mi_row, mi_col)
6869                         .as_mv;
6870     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6871                                      bsize, mi_row, mi_col)
6872                      .as_mv;
6873     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6874     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6875     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6876   }
6877   return mv_cost;
6878 }
6879
6880 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6881                            GF_PICTURE *gf_picture, MotionField *motion_field,
6882                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6883                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6884                            int_mv *mv) {
6885   MACROBLOCKD *xd = &x->e_mbd;
6886   double mv_dist =
6887       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6888                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6889   double mv_cost =
6890       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6891   double mult = 180;
6892
6893   return mv_cost + mult * log2f(1 + mv_dist);
6894 }
6895
6896 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6897                                  GF_PICTURE *gf_picture,
6898                                  MotionField *motion_field, int frame_idx,
6899                                  TplDepFrame *tpl_frame, int rf_idx,
6900                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6901                                  double *rd, int_mv *mv) {
6902   int best_mv_mode = ZERO_MV_MODE;
6903   int update = 0;
6904   int mv_mode;
6905   *rd = 0;
6906   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6907     double this_rd;
6908     int_mv this_mv;
6909     if (mv_mode == NEW_MV_MODE) {
6910       continue;
6911     }
6912     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6913                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6914     if (update == 0) {
6915       *rd = this_rd;
6916       *mv = this_mv;
6917       best_mv_mode = mv_mode;
6918       update = 1;
6919     } else {
6920       if (this_rd < *rd) {
6921         *rd = this_rd;
6922         *mv = this_mv;
6923         best_mv_mode = mv_mode;
6924       }
6925     }
6926   }
6927   return best_mv_mode;
6928 }
6929
6930 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6931                             GF_PICTURE *gf_picture, MotionField *motion_field,
6932                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6933                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6934   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6935   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6936   int tmp_mv_mode_arr[kMvPreCheckSize];
6937   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6938   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6939   int_mv *select_mv_arr = cpi->select_mv_arr;
6940   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6941   int stride = tpl_frame->stride;
6942   double new_mv_rd = 0;
6943   double no_new_mv_rd = 0;
6944   double this_new_mv_rd = 0;
6945   double this_no_new_mv_rd = 0;
6946   int idx;
6947   int tmp_idx;
6948   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6949
6950   // no new mv
6951   // diagonal scan order
6952   tmp_idx = 0;
6953   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6954     int r;
6955     for (r = 0; r <= idx; ++r) {
6956       int c = idx - r;
6957       int nb_row = mi_row + r * mi_height;
6958       int nb_col = mi_col + c * mi_width;
6959       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6960         double this_rd;
6961         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6962         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6963             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6964             bsize, nb_row, nb_col, &this_rd, mv);
6965         if (r == 0 && c == 0) {
6966           this_no_new_mv_rd = this_rd;
6967         }
6968         no_new_mv_rd += this_rd;
6969         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6970         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6971         ++tmp_idx;
6972       }
6973     }
6974   }
6975
6976   // new mv
6977   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6978   this_new_mv_rd = eval_mv_mode(
6979       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6980       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6981   new_mv_rd = this_new_mv_rd;
6982   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6983   // beforehand.
6984   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6985     int r;
6986     for (r = 0; r <= idx; ++r) {
6987       int c = idx - r;
6988       int nb_row = mi_row + r * mi_height;
6989       int nb_col = mi_col + c * mi_width;
6990       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6991         double this_rd;
6992         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6993         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6994             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6995             bsize, nb_row, nb_col, &this_rd, mv);
6996         new_mv_rd += this_rd;
6997       }
6998     }
6999   }
7000
7001   // update best_mv_mode
7002   tmp_idx = 0;
7003   if (no_new_mv_rd < new_mv_rd) {
7004     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
7005       int r;
7006       for (r = 0; r <= idx; ++r) {
7007         int c = idx - r;
7008         int nb_row = mi_row + r * mi_height;
7009         int nb_col = mi_col + c * mi_width;
7010         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
7011           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
7012           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
7013           ++tmp_idx;
7014         }
7015       }
7016     }
7017     rd_diff_arr[mi_row * stride + mi_col] = 0;
7018   } else {
7019     rd_diff_arr[mi_row * stride + mi_col] =
7020         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
7021   }
7022 }
7023
7024 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
7025                                 GF_PICTURE *gf_picture,
7026                                 MotionField *motion_field, int frame_idx,
7027                                 TplDepFrame *tpl_frame, int rf_idx,
7028                                 BLOCK_SIZE bsize) {
7029   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7030   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7031   const int unit_rows = tpl_frame->mi_rows / mi_height;
7032   const int unit_cols = tpl_frame->mi_cols / mi_width;
7033   const int max_diagonal_lines = unit_rows + unit_cols - 1;
7034   int idx;
7035   for (idx = 0; idx < max_diagonal_lines; ++idx) {
7036     int r;
7037     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
7038          ++r) {
7039       int c = idx - r;
7040       int mi_row = r * mi_height;
7041       int mi_col = c * mi_width;
7042       assert(c >= 0 && c < unit_cols);
7043       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
7044       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
7045       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
7046                       rf_idx, bsize, mi_row, mi_col);
7047     }
7048   }
7049 }
7050
7051 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
7052                              MotionField *motion_field, int frame_idx,
7053                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
7054                              int mi_row, int mi_col) {
7055   VP9_COMMON *cm = &cpi->common;
7056   MACROBLOCK *x = &td->mb;
7057   MACROBLOCKD *xd = &x->e_mbd;
7058   const int mb_y_offset =
7059       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
7060   assert(ref_frame != NULL);
7061   set_mv_limits(cm, x, mi_row, mi_col);
7062   {
7063     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
7064     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
7065     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
7066     const int stride = xd->cur_buf->y_stride;
7067     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
7068                              ref_frame_buf, stride, bsize, mi_row, mi_col,
7069                              &mv.as_mv);
7070     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
7071                             bsize, &mv.as_mv);
7072     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
7073   }
7074 }
7075
7076 static void build_motion_field(
7077     VP9_COMP *cpi, int frame_idx,
7078     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
7079   VP9_COMMON *cm = &cpi->common;
7080   ThreadData *td = &cpi->td;
7081   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7082   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7083   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7084   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
7085   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
7086   int mi_row, mi_col;
7087   int rf_idx;
7088
7089   tpl_frame->lambda = (pw * ph) >> 2;
7090   assert(pw * ph == tpl_frame->lambda << 2);
7091
7092   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7093     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7094         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7095     if (ref_frame[rf_idx] == NULL) {
7096       continue;
7097     }
7098     vp9_motion_field_reset_mvs(motion_field);
7099     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7100       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7101         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
7102                          bsize, mi_row, mi_col);
7103       }
7104     }
7105   }
7106 }
7107 #endif  // CONFIG_NON_GREEDY_MV
7108
7109 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
7110                               int frame_idx, BLOCK_SIZE bsize) {
7111   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7112   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
7113   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
7114
7115   VP9_COMMON *cm = &cpi->common;
7116   struct scale_factors sf;
7117   int rdmult, idx;
7118   ThreadData *td = &cpi->td;
7119   MACROBLOCK *x = &td->mb;
7120   MACROBLOCKD *xd = &x->e_mbd;
7121   int mi_row, mi_col;
7122
7123 #if CONFIG_VP9_HIGHBITDEPTH
7124   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
7125   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
7126   uint8_t *predictor;
7127 #else
7128   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
7129 #endif
7130   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
7131   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
7132   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
7133   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
7134
7135   const TX_SIZE tx_size = max_txsize_lookup[bsize];
7136   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7137   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7138   int64_t recon_error, sse;
7139 #if CONFIG_NON_GREEDY_MV
7140   int square_block_idx;
7141   int rf_idx;
7142 #endif
7143
7144   // Setup scaling factor
7145 #if CONFIG_VP9_HIGHBITDEPTH
7146   vp9_setup_scale_factors_for_frame(
7147       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7148       this_frame->y_crop_width, this_frame->y_crop_height,
7149       cpi->common.use_highbitdepth);
7150
7151   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7152     predictor = CONVERT_TO_BYTEPTR(predictor16);
7153   else
7154     predictor = predictor8;
7155 #else
7156   vp9_setup_scale_factors_for_frame(
7157       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7158       this_frame->y_crop_width, this_frame->y_crop_height);
7159 #endif  // CONFIG_VP9_HIGHBITDEPTH
7160
7161   // Prepare reference frame pointers. If any reference frame slot is
7162   // unavailable, the pointer will be set to Null.
7163   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
7164     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
7165     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
7166   }
7167
7168   xd->mi = cm->mi_grid_visible;
7169   xd->mi[0] = cm->mi;
7170   xd->cur_buf = this_frame;
7171
7172   // Get rd multiplier set up.
7173   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
7174   set_error_per_bit(&cpi->td.mb, rdmult);
7175   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
7176
7177   tpl_frame->is_valid = 1;
7178
7179   cm->base_qindex = tpl_frame->base_qindex;
7180   vp9_frame_init_quantizer(cpi);
7181
7182 #if CONFIG_NON_GREEDY_MV
7183   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
7184        ++square_block_idx) {
7185     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
7186     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
7187   }
7188   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7189     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7190     if (ref_frame_idx != -1) {
7191       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7192           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7193       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
7194                           tpl_frame, rf_idx, bsize);
7195     }
7196   }
7197 #endif
7198
7199   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7200     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7201       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
7202                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
7203                       tx_size, ref_frame, predictor, &recon_error, &sse);
7204       // Motion flow dependency dispenser.
7205       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
7206                       tpl_frame->stride);
7207
7208       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
7209                        bsize);
7210     }
7211   }
7212 }
7213
7214 #if CONFIG_NON_GREEDY_MV
7215 #define DUMP_TPL_STATS 0
7216 #if DUMP_TPL_STATS
7217 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
7218   int i, j;
7219   printf("%d %d\n", h, w);
7220   for (i = 0; i < h; ++i) {
7221     for (j = 0; j < w; ++j) {
7222       printf("%d ", buf[(row + i) * stride + col + j]);
7223     }
7224   }
7225   printf("\n");
7226 }
7227
7228 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
7229   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
7230            frame_buf->y_width);
7231   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
7232            frame_buf->uv_height, frame_buf->uv_width);
7233   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
7234            frame_buf->uv_height, frame_buf->uv_width);
7235 }
7236
7237 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
7238                            const GF_GROUP *gf_group,
7239                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
7240   int frame_idx;
7241   const VP9_COMMON *cm = &cpi->common;
7242   int rf_idx;
7243   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
7244     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7245       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7246       int mi_row, mi_col;
7247       int ref_frame_idx;
7248       const int mi_height = num_8x8_blocks_high_lookup[bsize];
7249       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7250       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7251       if (ref_frame_idx != -1) {
7252         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
7253         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
7254         const int ref_gf_frame_offset =
7255             gf_group->frame_gop_index[ref_frame_idx];
7256         printf("=\n");
7257         printf(
7258             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
7259             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
7260             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
7261             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
7262         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7263           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7264             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7265               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
7266                                                        frame_idx, rf_idx, bsize,
7267                                                        mi_row, mi_col);
7268               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
7269                      mv.as_mv.col);
7270             }
7271           }
7272         }
7273         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7274           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7275             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7276               const TplDepStats *tpl_ptr =
7277                   &tpl_frame
7278                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
7279               printf("%f ", tpl_ptr->feature_score);
7280             }
7281           }
7282         }
7283         printf("\n");
7284
7285         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7286           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7287             const int mv_mode =
7288                 tpl_frame
7289                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
7290             printf("%d ", mv_mode);
7291           }
7292         }
7293         printf("\n");
7294
7295         dump_frame_buf(gf_picture[frame_idx].frame);
7296         dump_frame_buf(ref_frame_buf);
7297       }
7298     }
7299   }
7300 }
7301 #endif  // DUMP_TPL_STATS
7302 #endif  // CONFIG_NON_GREEDY_MV
7303
7304 static void init_tpl_buffer(VP9_COMP *cpi) {
7305   VP9_COMMON *cm = &cpi->common;
7306   int frame;
7307
7308   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7309   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7310 #if CONFIG_NON_GREEDY_MV
7311   int rf_idx;
7312
7313   vpx_free(cpi->select_mv_arr);
7314   CHECK_MEM_ERROR(
7315       cm, cpi->select_mv_arr,
7316       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
7317 #endif
7318
7319   // TODO(jingning): Reduce the actual memory use for tpl model build up.
7320   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7321     if (cpi->tpl_stats[frame].width >= mi_cols &&
7322         cpi->tpl_stats[frame].height >= mi_rows &&
7323         cpi->tpl_stats[frame].tpl_stats_ptr)
7324       continue;
7325
7326 #if CONFIG_NON_GREEDY_MV
7327     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7328       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7329       CHECK_MEM_ERROR(
7330           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
7331           vpx_calloc(mi_rows * mi_cols * 4,
7332                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
7333       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7334       CHECK_MEM_ERROR(
7335           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
7336           vpx_calloc(mi_rows * mi_cols * 4,
7337                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
7338     }
7339 #endif
7340     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7341     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
7342                     vpx_calloc(mi_rows * mi_cols,
7343                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
7344     cpi->tpl_stats[frame].is_valid = 0;
7345     cpi->tpl_stats[frame].width = mi_cols;
7346     cpi->tpl_stats[frame].height = mi_rows;
7347     cpi->tpl_stats[frame].stride = mi_cols;
7348     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
7349     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7350   }
7351
7352   for (frame = 0; frame < REF_FRAMES; ++frame) {
7353     cpi->enc_frame_buf[frame].mem_valid = 0;
7354     cpi->enc_frame_buf[frame].released = 1;
7355   }
7356 }
7357
7358 static void free_tpl_buffer(VP9_COMP *cpi) {
7359   int frame;
7360 #if CONFIG_NON_GREEDY_MV
7361   vp9_free_motion_field_info(&cpi->motion_field_info);
7362   vpx_free(cpi->select_mv_arr);
7363 #endif
7364   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7365 #if CONFIG_NON_GREEDY_MV
7366     int rf_idx;
7367     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7368       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7369       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7370     }
7371 #endif
7372     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7373     cpi->tpl_stats[frame].is_valid = 0;
7374   }
7375 }
7376
7377 #if CONFIG_RATE_CTRL
7378 static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
7379   VP9_COMMON *const cm = &cpi->common;
7380   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7381   int show_frame_count = 0;
7382   int frame_idx;
7383   // Accumulate tpl stats for each frame in the current group of picture.
7384   for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
7385     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7386     if (!tpl_frame->is_valid) continue;
7387
7388     TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
7389     const int tpl_stride = tpl_frame->stride;
7390     int64_t intra_cost_base = 0;
7391     int64_t inter_cost_base = 0;
7392     int64_t mc_dep_cost_base = 0;
7393     int64_t mc_ref_cost_base = 0;
7394     int64_t mc_flow_base = 0;
7395     int row, col;
7396
7397     for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
7398       for (col = 0; col < cm->mi_cols; ++col) {
7399         TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
7400         intra_cost_base += this_stats->intra_cost;
7401         inter_cost_base += this_stats->inter_cost;
7402         mc_dep_cost_base += this_stats->mc_dep_cost;
7403         mc_ref_cost_base += this_stats->mc_ref_cost;
7404         mc_flow_base += this_stats->mc_flow;
7405       }
7406     }
7407
7408     cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
7409     cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
7410     cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
7411     cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
7412     cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
7413
7414     ++show_frame_count;
7415   }
7416 }
7417 #endif  // CONFIG_RATE_CTRL
7418
7419 static void setup_tpl_stats(VP9_COMP *cpi) {
7420   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7421   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7422   int tpl_group_frames = 0;
7423   int frame_idx;
7424   cpi->tpl_bsize = BLOCK_32X32;
7425
7426   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7427
7428   init_tpl_stats(cpi);
7429
7430   // Backward propagation from tpl_group_frames to 1.
7431   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7432     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7433     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7434   }
7435 #if CONFIG_NON_GREEDY_MV
7436   cpi->tpl_ready = 1;
7437 #if DUMP_TPL_STATS
7438   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7439 #endif  // DUMP_TPL_STATS
7440 #endif  // CONFIG_NON_GREEDY_MV
7441
7442 #if CONFIG_RATE_CTRL
7443   accumulate_frame_tpl_stats(cpi);
7444 #endif  // CONFIG_RATE_CTRL
7445 }
7446
7447 void vp9_get_ref_frame_info(FRAME_UPDATE_TYPE update_type, int ref_frame_flags,
7448                             RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES],
7449                             int *ref_frame_coding_indexes,
7450                             int *ref_frame_valid_list) {
7451   if (update_type != KF_UPDATE) {
7452     const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
7453                                                                  VP9_GOLD_FLAG,
7454                                                                  VP9_ALT_FLAG };
7455     int i;
7456     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7457       assert(ref_frame_bufs[i] != NULL);
7458       ref_frame_coding_indexes[i] = ref_frame_bufs[i]->frame_coding_index;
7459       ref_frame_valid_list[i] = (ref_frame_flags & inter_ref_flags[i]) != 0;
7460     }
7461   } else {
7462     // No reference frame is available when this is a key frame.
7463     int i;
7464     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7465       ref_frame_coding_indexes[i] = -1;
7466       ref_frame_valid_list[i] = 0;
7467     }
7468   }
7469 }
7470
7471 #if !CONFIG_REALTIME_ONLY
7472 #if CONFIG_RATE_CTRL
7473 static void copy_frame_counts(const FRAME_COUNTS *input_counts,
7474                               FRAME_COUNTS *output_counts) {
7475   int i, j, k, l, m, n;
7476   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
7477     for (j = 0; j < INTRA_MODES; ++j) {
7478       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
7479     }
7480   }
7481   for (i = 0; i < INTRA_MODES; ++i) {
7482     for (j = 0; j < INTRA_MODES; ++j) {
7483       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
7484     }
7485   }
7486   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
7487     for (j = 0; j < PARTITION_TYPES; ++j) {
7488       output_counts->partition[i][j] = input_counts->partition[i][j];
7489     }
7490   }
7491   for (i = 0; i < TX_SIZES; ++i) {
7492     for (j = 0; j < PLANE_TYPES; ++j) {
7493       for (k = 0; k < REF_TYPES; ++k) {
7494         for (l = 0; l < COEF_BANDS; ++l) {
7495           for (m = 0; m < COEFF_CONTEXTS; ++m) {
7496             output_counts->eob_branch[i][j][k][l][m] =
7497                 input_counts->eob_branch[i][j][k][l][m];
7498             for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
7499               output_counts->coef[i][j][k][l][m][n] =
7500                   input_counts->coef[i][j][k][l][m][n];
7501             }
7502           }
7503         }
7504       }
7505     }
7506   }
7507   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
7508     for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
7509       output_counts->switchable_interp[i][j] =
7510           input_counts->switchable_interp[i][j];
7511     }
7512   }
7513   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
7514     for (j = 0; j < INTER_MODES; ++j) {
7515       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
7516     }
7517   }
7518   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
7519     for (j = 0; j < 2; ++j) {
7520       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
7521     }
7522   }
7523   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
7524     for (j = 0; j < 2; ++j) {
7525       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
7526     }
7527   }
7528   for (i = 0; i < REF_CONTEXTS; ++i) {
7529     for (j = 0; j < 2; ++j) {
7530       for (k = 0; k < 2; ++k) {
7531         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
7532       }
7533     }
7534   }
7535   for (i = 0; i < REF_CONTEXTS; ++i) {
7536     for (j = 0; j < 2; ++j) {
7537       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
7538     }
7539   }
7540   for (i = 0; i < SKIP_CONTEXTS; ++i) {
7541     for (j = 0; j < 2; ++j) {
7542       output_counts->skip[i][j] = input_counts->skip[i][j];
7543     }
7544   }
7545   for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
7546     for (j = 0; j < TX_SIZES; j++) {
7547       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
7548     }
7549     for (j = 0; j < TX_SIZES - 1; j++) {
7550       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
7551     }
7552     for (j = 0; j < TX_SIZES - 2; j++) {
7553       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
7554     }
7555   }
7556   for (i = 0; i < TX_SIZES; i++) {
7557     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
7558   }
7559   for (i = 0; i < MV_JOINTS; i++) {
7560     output_counts->mv.joints[i] = input_counts->mv.joints[i];
7561   }
7562   for (k = 0; k < 2; k++) {
7563     nmv_component_counts *const comps = &output_counts->mv.comps[k];
7564     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
7565     for (i = 0; i < 2; i++) {
7566       comps->sign[i] = comps_t->sign[i];
7567       comps->class0_hp[i] = comps_t->class0_hp[i];
7568       comps->hp[i] = comps_t->hp[i];
7569     }
7570     for (i = 0; i < MV_CLASSES; i++) {
7571       comps->classes[i] = comps_t->classes[i];
7572     }
7573     for (i = 0; i < CLASS0_SIZE; i++) {
7574       comps->class0[i] = comps_t->class0[i];
7575       for (j = 0; j < MV_FP_SIZE; j++) {
7576         comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
7577       }
7578     }
7579     for (i = 0; i < MV_OFFSET_BITS; i++) {
7580       for (j = 0; j < 2; j++) {
7581         comps->bits[i][j] = comps_t->bits[i][j];
7582       }
7583     }
7584     for (i = 0; i < MV_FP_SIZE; i++) {
7585       comps->fp[i] = comps_t->fp[i];
7586     }
7587   }
7588 }
7589
7590 static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
7591                                         IMAGE_BUFFER *image_buffer) {
7592   const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
7593                                    yv12_buffer->v_buffer };
7594   const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
7595                                  yv12_buffer->uv_stride };
7596   const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
7597                         yv12_buffer->uv_crop_width };
7598   const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
7599                         yv12_buffer->uv_crop_height };
7600   int plane;
7601   for (plane = 0; plane < 3; ++plane) {
7602     const int src_stride = src_stride_ls[plane];
7603     const int w = w_ls[plane];
7604     const int h = h_ls[plane];
7605     const uint8_t *src_buf = src_buf_ls[plane];
7606     uint8_t *dst_buf = image_buffer->plane_buffer[plane];
7607     int r;
7608     assert(image_buffer->plane_width[plane] == w);
7609     assert(image_buffer->plane_height[plane] == h);
7610     for (r = 0; r < h; ++r) {
7611       memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
7612       src_buf += src_stride;
7613       dst_buf += w;
7614     }
7615   }
7616 }
7617 #endif  // CONFIG_RATE_CTRL
7618
7619 static void update_encode_frame_result(
7620     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
7621     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
7622     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
7623     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
7624 #if CONFIG_RATE_CTRL
7625     const PARTITION_INFO *partition_info,
7626     const MOTION_VECTOR_INFO *motion_vector_info,
7627     const TplDepStats *tpl_stats_info,
7628 #endif  // CONFIG_RATE_CTRL
7629     ENCODE_FRAME_RESULT *encode_frame_result) {
7630 #if CONFIG_RATE_CTRL
7631   PSNR_STATS psnr;
7632 #if CONFIG_VP9_HIGHBITDEPTH
7633   vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,
7634                        input_bit_depth);
7635 #else   // CONFIG_VP9_HIGHBITDEPTH
7636   (void)bit_depth;
7637   (void)input_bit_depth;
7638   vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
7639 #endif  // CONFIG_VP9_HIGHBITDEPTH
7640   encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
7641
7642   vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,
7643                          encode_frame_result->ref_frame_coding_indexes,
7644                          encode_frame_result->ref_frame_valid_list);
7645
7646   encode_frame_result->psnr = psnr.psnr[0];
7647   encode_frame_result->sse = psnr.sse[0];
7648   copy_frame_counts(counts, &encode_frame_result->frame_counts);
7649   encode_frame_result->partition_info = partition_info;
7650   encode_frame_result->motion_vector_info = motion_vector_info;
7651   encode_frame_result->tpl_stats_info = tpl_stats_info;
7652   if (encode_frame_result->coded_frame.allocated) {
7653     yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
7654                                 &encode_frame_result->coded_frame);
7655   }
7656 #else   // CONFIG_RATE_CTRL
7657   (void)ref_frame_flags;
7658   (void)bit_depth;
7659   (void)input_bit_depth;
7660   (void)source_frame;
7661   (void)coded_frame_buf;
7662   (void)ref_frame_bufs;
7663   (void)counts;
7664 #endif  // CONFIG_RATE_CTRL
7665   encode_frame_result->show_idx = coded_frame_buf->frame_index;
7666   encode_frame_result->update_type = update_type;
7667   encode_frame_result->quantize_index = quantize_index;
7668 }
7669 #endif  // !CONFIG_REALTIME_ONLY
7670
7671 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
7672   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
7673 #if CONFIG_RATE_CTRL
7674   encode_frame_result->frame_coding_index = -1;
7675   vp9_zero(encode_frame_result->coded_frame);
7676   encode_frame_result->coded_frame.allocated = 0;
7677   init_rq_history(&encode_frame_result->rq_history);
7678 #endif  // CONFIG_RATE_CTRL
7679 }
7680
7681 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7682                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7683                             int64_t *time_end, int flush,
7684                             ENCODE_FRAME_RESULT *encode_frame_result) {
7685   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7686   VP9_COMMON *const cm = &cpi->common;
7687   BufferPool *const pool = cm->buffer_pool;
7688   RATE_CONTROL *const rc = &cpi->rc;
7689   struct vpx_usec_timer cmptimer;
7690   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7691   struct lookahead_entry *last_source = NULL;
7692   struct lookahead_entry *source = NULL;
7693   int arf_src_index;
7694   const int gf_group_index = cpi->twopass.gf_group.index;
7695   int i;
7696
7697   if (is_one_pass_cbr_svc(cpi)) {
7698     vp9_one_pass_cbr_svc_start_layer(cpi);
7699   }
7700
7701   vpx_usec_timer_start(&cmptimer);
7702
7703   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7704
7705   // Is multi-arf enabled.
7706   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7707   // will not work properly with svc.
7708   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7709   // is greater than or equal to 2.
7710   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7711     cpi->multi_layer_arf = 1;
7712   else
7713     cpi->multi_layer_arf = 0;
7714
7715   // Normal defaults
7716   cm->reset_frame_context = 0;
7717   cm->refresh_frame_context = 1;
7718   if (!is_one_pass_cbr_svc(cpi)) {
7719     cpi->refresh_last_frame = 1;
7720     cpi->refresh_golden_frame = 0;
7721     cpi->refresh_alt_ref_frame = 0;
7722   }
7723
7724   // Should we encode an arf frame.
7725   arf_src_index = get_arf_src_index(cpi);
7726
7727   if (arf_src_index) {
7728     for (i = 0; i <= arf_src_index; ++i) {
7729       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7730       // Avoid creating an alt-ref if there's a forced keyframe pending.
7731       if (e == NULL) {
7732         break;
7733       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7734         arf_src_index = 0;
7735         flush = 1;
7736         break;
7737       }
7738     }
7739   }
7740
7741   // Clear arf index stack before group of pictures processing starts.
7742   if (gf_group_index == 1) {
7743     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7744     cpi->twopass.gf_group.stack_size = 0;
7745   }
7746
7747   if (arf_src_index) {
7748     assert(arf_src_index <= rc->frames_to_key);
7749     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7750       cpi->alt_ref_source = source;
7751
7752 #if !CONFIG_REALTIME_ONLY
7753       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7754           (oxcf->arnr_strength > 0)) {
7755         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7756         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7757
7758         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7759         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7760
7761         // Produce the filtered ARF frame.
7762         vp9_temporal_filter(cpi, arf_src_index);
7763         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7764
7765         // for small bitrates segmentation overhead usually
7766         // eats all bitrate gain from enabling delta quantizers
7767         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7768           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7769
7770         force_src_buffer = &cpi->alt_ref_buffer;
7771       }
7772 #endif
7773       cm->show_frame = 0;
7774       cm->intra_only = 0;
7775       cpi->refresh_alt_ref_frame = 1;
7776       cpi->refresh_golden_frame = 0;
7777       cpi->refresh_last_frame = 0;
7778       rc->is_src_frame_alt_ref = 0;
7779       rc->source_alt_ref_pending = 0;
7780     } else {
7781       rc->source_alt_ref_pending = 0;
7782     }
7783   }
7784
7785   if (!source) {
7786     // Get last frame source.
7787     if (cm->current_video_frame > 0) {
7788       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7789         return -1;
7790     }
7791
7792     // Read in the source frame.
7793     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7794       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7795     else
7796       source = vp9_lookahead_pop(cpi->lookahead, flush);
7797
7798     if (source != NULL) {
7799       cm->show_frame = 1;
7800       cm->intra_only = 0;
7801       // If the flags indicate intra frame, but if the current picture is for
7802       // spatial layer above first_spatial_layer_to_encode, it should not be an
7803       // intra picture.
7804       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7805           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7806         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7807       }
7808
7809       // Check to see if the frame should be encoded as an arf overlay.
7810       check_src_altref(cpi, source);
7811     }
7812   }
7813
7814   if (source) {
7815     cpi->un_scaled_source = cpi->Source =
7816         force_src_buffer ? force_src_buffer : &source->img;
7817
7818 #ifdef ENABLE_KF_DENOISE
7819     // Copy of raw source for metrics calculation.
7820     if (is_psnr_calc_enabled(cpi))
7821       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7822 #endif
7823
7824     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7825
7826     *time_stamp = source->ts_start;
7827     *time_end = source->ts_end;
7828     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7829   } else {
7830     *size = 0;
7831     return -1;
7832   }
7833
7834   if (source->ts_start < cpi->first_time_stamp_ever) {
7835     cpi->first_time_stamp_ever = source->ts_start;
7836     cpi->last_end_time_stamp_seen = source->ts_start;
7837   }
7838
7839   // Clear down mmx registers
7840   vpx_clear_system_state();
7841
7842   // adjust frame rates based on timestamps given
7843   if (cm->show_frame) {
7844     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7845         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7846       vp9_svc_adjust_frame_rate(cpi);
7847     else
7848       adjust_frame_rate(cpi, source);
7849   }
7850
7851   if (is_one_pass_cbr_svc(cpi)) {
7852     vp9_update_temporal_layer_framerate(cpi);
7853     vp9_restore_layer_context(cpi);
7854   }
7855
7856   // Find a free buffer for the new frame, releasing the reference previously
7857   // held.
7858   if (cm->new_fb_idx != INVALID_IDX) {
7859     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7860   }
7861   cm->new_fb_idx = get_free_fb(cm);
7862
7863   if (cm->new_fb_idx == INVALID_IDX) return -1;
7864   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7865   // If the frame buffer for current frame is the same as previous frame, MV in
7866   // the base layer shouldn't be used as it'll cause data race.
7867   if (cm->cur_frame == cm->prev_frame) {
7868     cpi->svc.use_base_mv = 0;
7869   }
7870   // Start with a 0 size frame.
7871   *size = 0;
7872
7873   cpi->frame_flags = *frame_flags;
7874
7875 #if !CONFIG_REALTIME_ONLY
7876   if ((oxcf->pass == 2) && !cpi->use_svc) {
7877     vp9_rc_get_second_pass_params(cpi);
7878   } else if (oxcf->pass == 1) {
7879     set_frame_size(cpi);
7880   }
7881 #endif  // !CONFIG_REALTIME_ONLY
7882
7883   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7884       cpi->level_constraint.fail_flag == 0)
7885     level_rc_framerate(cpi, arf_src_index);
7886
7887   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7888     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7889   }
7890
7891   if (cpi->kmeans_data_arr_alloc == 0) {
7892     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7893     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7894 #if CONFIG_MULTITHREAD
7895     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7896 #endif
7897     CHECK_MEM_ERROR(
7898         cm, cpi->kmeans_data_arr,
7899         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7900     cpi->kmeans_data_stride = mi_cols;
7901     cpi->kmeans_data_arr_alloc = 1;
7902   }
7903
7904 #if CONFIG_NON_GREEDY_MV
7905   {
7906     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7907     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7908     Status status = vp9_alloc_motion_field_info(
7909         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7910     if (status == STATUS_FAILED) {
7911       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7912                          "vp9_alloc_motion_field_info failed");
7913     }
7914   }
7915 #endif  // CONFIG_NON_GREEDY_MV
7916
7917   if (gf_group_index == 1 &&
7918       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7919       cpi->sf.enable_tpl_model) {
7920     init_tpl_buffer(cpi);
7921     vp9_estimate_qp_gop(cpi);
7922     setup_tpl_stats(cpi);
7923   }
7924
7925 #if CONFIG_BITSTREAM_DEBUG
7926   assert(cpi->oxcf.max_threads == 0 &&
7927          "bitstream debug tool does not support multithreading");
7928   bitstream_queue_record_write();
7929 #endif
7930 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7931   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7932 #endif
7933
7934   cpi->td.mb.fp_src_pred = 0;
7935 #if CONFIG_REALTIME_ONLY
7936   (void)encode_frame_result;
7937   if (cpi->use_svc) {
7938     SvcEncode(cpi, size, dest, frame_flags);
7939   } else {
7940     // One pass encode
7941     Pass0Encode(cpi, size, dest, frame_flags);
7942   }
7943 #else  // !CONFIG_REALTIME_ONLY
7944   if (oxcf->pass == 1 && !cpi->use_svc) {
7945     const int lossless = is_lossless_requested(oxcf);
7946 #if CONFIG_VP9_HIGHBITDEPTH
7947     if (cpi->oxcf.use_highbitdepth)
7948       cpi->td.mb.fwd_txfm4x4 =
7949           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7950     else
7951       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7952     cpi->td.mb.highbd_inv_txfm_add =
7953         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7954 #else
7955     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7956 #endif  // CONFIG_VP9_HIGHBITDEPTH
7957     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7958     vp9_first_pass(cpi, source);
7959   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7960     Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
7961     vp9_twopass_postencode_update(cpi);
7962   } else if (cpi->use_svc) {
7963     SvcEncode(cpi, size, dest, frame_flags);
7964   } else {
7965     // One pass encode
7966     Pass0Encode(cpi, size, dest, frame_flags);
7967   }
7968 #endif  // CONFIG_REALTIME_ONLY
7969
7970   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7971
7972   if (cm->refresh_frame_context)
7973     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7974
7975   // No frame encoded, or frame was dropped, release scaled references.
7976   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7977     release_scaled_references(cpi);
7978   }
7979
7980   if (*size > 0) {
7981     cpi->droppable = !frame_is_reference(cpi);
7982   }
7983
7984   // Save layer specific state.
7985   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7986                                     cpi->svc.number_spatial_layers > 1) &&
7987                                    oxcf->pass == 2)) {
7988     vp9_save_layer_context(cpi);
7989   }
7990
7991   vpx_usec_timer_mark(&cmptimer);
7992   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7993
7994   if (cpi->keep_level_stats && oxcf->pass != 1)
7995     update_level_info(cpi, size, arf_src_index);
7996
7997 #if CONFIG_INTERNAL_STATS
7998
7999   if (oxcf->pass != 1) {
8000     double samples = 0.0;
8001     cpi->bytes += (int)(*size);
8002
8003     if (cm->show_frame) {
8004       uint32_t bit_depth = 8;
8005       uint32_t in_bit_depth = 8;
8006       cpi->count++;
8007 #if CONFIG_VP9_HIGHBITDEPTH
8008       if (cm->use_highbitdepth) {
8009         in_bit_depth = cpi->oxcf.input_bit_depth;
8010         bit_depth = cm->bit_depth;
8011       }
8012 #endif
8013
8014       if (cpi->b_calculate_psnr) {
8015         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
8016         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
8017         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
8018         PSNR_STATS psnr;
8019 #if CONFIG_VP9_HIGHBITDEPTH
8020         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
8021                              in_bit_depth);
8022 #else
8023         vpx_calc_psnr(orig, recon, &psnr);
8024 #endif  // CONFIG_VP9_HIGHBITDEPTH
8025
8026         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
8027                           psnr.psnr[0], &cpi->psnr);
8028         cpi->total_sq_error += psnr.sse[0];
8029         cpi->total_samples += psnr.samples[0];
8030         samples = psnr.samples[0];
8031
8032         {
8033           PSNR_STATS psnr2;
8034           double frame_ssim2 = 0, weight = 0;
8035 #if CONFIG_VP9_POSTPROC
8036           if (vpx_alloc_frame_buffer(
8037                   pp, recon->y_crop_width, recon->y_crop_height,
8038                   cm->subsampling_x, cm->subsampling_y,
8039 #if CONFIG_VP9_HIGHBITDEPTH
8040                   cm->use_highbitdepth,
8041 #endif
8042                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
8043             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
8044                                "Failed to allocate post processing buffer");
8045           }
8046           {
8047             vp9_ppflags_t ppflags;
8048             ppflags.post_proc_flag = VP9D_DEBLOCK;
8049             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
8050             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
8051             vp9_post_proc_frame(cm, pp, &ppflags,
8052                                 cpi->un_scaled_source->y_width);
8053           }
8054 #endif
8055           vpx_clear_system_state();
8056
8057 #if CONFIG_VP9_HIGHBITDEPTH
8058           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
8059                                cpi->oxcf.input_bit_depth);
8060 #else
8061           vpx_calc_psnr(orig, pp, &psnr2);
8062 #endif  // CONFIG_VP9_HIGHBITDEPTH
8063
8064           cpi->totalp_sq_error += psnr2.sse[0];
8065           cpi->totalp_samples += psnr2.samples[0];
8066           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
8067                             psnr2.psnr[0], &cpi->psnrp);
8068
8069 #if CONFIG_VP9_HIGHBITDEPTH
8070           if (cm->use_highbitdepth) {
8071             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
8072                                                in_bit_depth);
8073           } else {
8074             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
8075           }
8076 #else
8077           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
8078 #endif  // CONFIG_VP9_HIGHBITDEPTH
8079
8080           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
8081           cpi->summed_quality += frame_ssim2 * weight;
8082           cpi->summed_weights += weight;
8083
8084 #if CONFIG_VP9_HIGHBITDEPTH
8085           if (cm->use_highbitdepth) {
8086             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
8087                                                in_bit_depth);
8088           } else {
8089             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
8090           }
8091 #else
8092           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
8093 #endif  // CONFIG_VP9_HIGHBITDEPTH
8094
8095           cpi->summedp_quality += frame_ssim2 * weight;
8096           cpi->summedp_weights += weight;
8097 #if 0
8098           if (cm->show_frame) {
8099             FILE *f = fopen("q_used.stt", "a");
8100             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
8101                     cpi->common.current_video_frame, psnr2.psnr[1],
8102                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
8103             fclose(f);
8104           }
8105 #endif
8106         }
8107       }
8108       if (cpi->b_calculate_blockiness) {
8109 #if CONFIG_VP9_HIGHBITDEPTH
8110         if (!cm->use_highbitdepth)
8111 #endif
8112         {
8113           double frame_blockiness = vp9_get_blockiness(
8114               cpi->Source->y_buffer, cpi->Source->y_stride,
8115               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
8116               cpi->Source->y_width, cpi->Source->y_height);
8117           cpi->worst_blockiness =
8118               VPXMAX(cpi->worst_blockiness, frame_blockiness);
8119           cpi->total_blockiness += frame_blockiness;
8120         }
8121       }
8122
8123       if (cpi->b_calculate_consistency) {
8124 #if CONFIG_VP9_HIGHBITDEPTH
8125         if (!cm->use_highbitdepth)
8126 #endif
8127         {
8128           double this_inconsistency = vpx_get_ssim_metrics(
8129               cpi->Source->y_buffer, cpi->Source->y_stride,
8130               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
8131               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
8132               &cpi->metrics, 1);
8133
8134           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
8135           double consistency =
8136               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
8137           if (consistency > 0.0)
8138             cpi->worst_consistency =
8139                 VPXMIN(cpi->worst_consistency, consistency);
8140           cpi->total_inconsistency += this_inconsistency;
8141         }
8142       }
8143
8144       {
8145         double y, u, v, frame_all;
8146         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
8147                                       &v, bit_depth, in_bit_depth);
8148         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
8149       }
8150       {
8151         double y, u, v, frame_all;
8152         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
8153                                 bit_depth, in_bit_depth);
8154         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
8155       }
8156     }
8157   }
8158
8159 #endif
8160
8161   if (is_one_pass_cbr_svc(cpi)) {
8162     if (cm->show_frame) {
8163       ++cpi->svc.spatial_layer_to_encode;
8164       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
8165         cpi->svc.spatial_layer_to_encode = 0;
8166     }
8167   }
8168
8169   vpx_clear_system_state();
8170   return 0;
8171 }
8172
8173 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
8174                               vp9_ppflags_t *flags) {
8175   VP9_COMMON *cm = &cpi->common;
8176 #if !CONFIG_VP9_POSTPROC
8177   (void)flags;
8178 #endif
8179
8180   if (!cm->show_frame) {
8181     return -1;
8182   } else {
8183     int ret;
8184 #if CONFIG_VP9_POSTPROC
8185     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
8186 #else
8187     if (cm->frame_to_show) {
8188       *dest = *cm->frame_to_show;
8189       dest->y_width = cm->width;
8190       dest->y_height = cm->height;
8191       dest->uv_width = cm->width >> cm->subsampling_x;
8192       dest->uv_height = cm->height >> cm->subsampling_y;
8193       ret = 0;
8194     } else {
8195       ret = -1;
8196     }
8197 #endif  // !CONFIG_VP9_POSTPROC
8198     vpx_clear_system_state();
8199     return ret;
8200   }
8201 }
8202
8203 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
8204                           VPX_SCALING vert_mode) {
8205   VP9_COMMON *cm = &cpi->common;
8206   int hr = 0, hs = 0, vr = 0, vs = 0;
8207
8208   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
8209
8210   Scale2Ratio(horiz_mode, &hr, &hs);
8211   Scale2Ratio(vert_mode, &vr, &vs);
8212
8213   // always go to the next whole number
8214   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
8215   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
8216   if (cm->current_video_frame) {
8217     assert(cm->width <= cpi->initial_width);
8218     assert(cm->height <= cpi->initial_height);
8219   }
8220
8221   update_frame_size(cpi);
8222
8223   return 0;
8224 }
8225
8226 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
8227                          unsigned int height) {
8228   VP9_COMMON *cm = &cpi->common;
8229 #if CONFIG_VP9_HIGHBITDEPTH
8230   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
8231 #else
8232   update_initial_width(cpi, 0, 1, 1);
8233 #endif  // CONFIG_VP9_HIGHBITDEPTH
8234
8235 #if CONFIG_VP9_TEMPORAL_DENOISING
8236   setup_denoiser_buffer(cpi);
8237 #endif
8238   alloc_raw_frame_buffers(cpi);
8239   if (width) {
8240     cm->width = width;
8241     if (cm->width > cpi->initial_width) {
8242       cm->width = cpi->initial_width;
8243       printf("Warning: Desired width too large, changed to %d\n", cm->width);
8244     }
8245   }
8246
8247   if (height) {
8248     cm->height = height;
8249     if (cm->height > cpi->initial_height) {
8250       cm->height = cpi->initial_height;
8251       printf("Warning: Desired height too large, changed to %d\n", cm->height);
8252     }
8253   }
8254   assert(cm->width <= cpi->initial_width);
8255   assert(cm->height <= cpi->initial_height);
8256
8257   update_frame_size(cpi);
8258
8259   return 0;
8260 }
8261
8262 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
8263   cpi->use_svc = use_svc;
8264   return;
8265 }
8266
8267 int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
8268
8269 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
8270   if (flags &
8271       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
8272     int ref = 7;
8273
8274     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
8275
8276     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
8277
8278     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
8279
8280     vp9_use_as_reference(cpi, ref);
8281   }
8282
8283   if (flags &
8284       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
8285        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
8286     int upd = 7;
8287
8288     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
8289
8290     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
8291
8292     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
8293
8294     vp9_update_reference(cpi, upd);
8295   }
8296
8297   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
8298     vp9_update_entropy(cpi, 0);
8299   }
8300 }
8301
8302 void vp9_set_row_mt(VP9_COMP *cpi) {
8303   // Enable row based multi-threading for supported modes of encoding
8304   cpi->row_mt = 0;
8305   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
8306        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
8307       cpi->oxcf.row_mt && !cpi->use_svc)
8308     cpi->row_mt = 1;
8309
8310   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
8311       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
8312       !cpi->use_svc)
8313     cpi->row_mt = 1;
8314
8315   // In realtime mode, enable row based multi-threading for all the speed levels
8316   // where non-rd path is used.
8317   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
8318     cpi->row_mt = 1;
8319   }
8320
8321   if (cpi->row_mt)
8322     cpi->row_mt_bit_exact = 1;
8323   else
8324     cpi->row_mt_bit_exact = 0;
8325 }