granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI structure in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024 #if CONFIG_RATE_CTRL
1025   free_partition_info(cpi);
1026   free_motion_vector_info(cpi);
1027 #endif
1028
1029   vp9_free_ref_frame_buffers(cm->buffer_pool);
1030 #if CONFIG_VP9_POSTPROC
1031   vp9_free_postproc_buffers(cm);
1032 #endif
1033   vp9_free_context_buffers(cm);
1034
1035   vpx_free_frame_buffer(&cpi->last_frame_uf);
1036   vpx_free_frame_buffer(&cpi->scaled_source);
1037   vpx_free_frame_buffer(&cpi->scaled_last_source);
1038   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1039 #ifdef ENABLE_KF_DENOISE
1040   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1041   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1042 #endif
1043
1044   vp9_lookahead_destroy(cpi->lookahead);
1045
1046   vpx_free(cpi->tile_tok[0][0]);
1047   cpi->tile_tok[0][0] = 0;
1048
1049   vpx_free(cpi->tplist[0][0]);
1050   cpi->tplist[0][0] = NULL;
1051
1052   vp9_free_pc_tree(&cpi->td);
1053
1054   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1055     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1056     vpx_free(lc->rc_twopass_stats_in.buf);
1057     lc->rc_twopass_stats_in.buf = NULL;
1058     lc->rc_twopass_stats_in.sz = 0;
1059   }
1060
1061   if (cpi->source_diff_var != NULL) {
1062     vpx_free(cpi->source_diff_var);
1063     cpi->source_diff_var = NULL;
1064   }
1065
1066   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1067     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1068   }
1069   memset(&cpi->svc.scaled_frames[0], 0,
1070          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1071
1072   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1073   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1074
1075   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1076   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1077
1078   vp9_free_svc_cyclic_refresh(cpi);
1079 }
1080
1081 static void save_coding_context(VP9_COMP *cpi) {
1082   CODING_CONTEXT *const cc = &cpi->coding_context;
1083   VP9_COMMON *cm = &cpi->common;
1084
1085   // Stores a snapshot of key state variables which can subsequently be
1086   // restored with a call to vp9_restore_coding_context. These functions are
1087   // intended for use in a re-code loop in vp9_compress_frame where the
1088   // quantizer value is adjusted between loop iterations.
1089   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1090
1091   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1092          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1093   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1094          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1095   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1096          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1097   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1098          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1099
1100   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1101
1102   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1103          (cm->mi_rows * cm->mi_cols));
1104
1105   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1106   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1107
1108   cc->fc = *cm->fc;
1109 }
1110
1111 static void restore_coding_context(VP9_COMP *cpi) {
1112   CODING_CONTEXT *const cc = &cpi->coding_context;
1113   VP9_COMMON *cm = &cpi->common;
1114
1115   // Restore key state variables to the snapshot state stored in the
1116   // previous call to vp9_save_coding_context.
1117   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1118
1119   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1120   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1121   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1122          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1123   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1124          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1125
1126   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1127
1128   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1129          (cm->mi_rows * cm->mi_cols));
1130
1131   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1132   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1133
1134   *cm->fc = cc->fc;
1135 }
1136
1137 #if !CONFIG_REALTIME_ONLY
1138 static void configure_static_seg_features(VP9_COMP *cpi) {
1139   VP9_COMMON *const cm = &cpi->common;
1140   const RATE_CONTROL *const rc = &cpi->rc;
1141   struct segmentation *const seg = &cm->seg;
1142
1143   int high_q = (int)(rc->avg_q > 48.0);
1144   int qi_delta;
1145
1146   // Disable and clear down for KF
1147   if (cm->frame_type == KEY_FRAME) {
1148     // Clear down the global segmentation map
1149     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1150     seg->update_map = 0;
1151     seg->update_data = 0;
1152     cpi->static_mb_pct = 0;
1153
1154     // Disable segmentation
1155     vp9_disable_segmentation(seg);
1156
1157     // Clear down the segment features.
1158     vp9_clearall_segfeatures(seg);
1159   } else if (cpi->refresh_alt_ref_frame) {
1160     // If this is an alt ref frame
1161     // Clear down the global segmentation map
1162     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1163     seg->update_map = 0;
1164     seg->update_data = 0;
1165     cpi->static_mb_pct = 0;
1166
1167     // Disable segmentation and individual segment features by default
1168     vp9_disable_segmentation(seg);
1169     vp9_clearall_segfeatures(seg);
1170
1171     // Scan frames from current to arf frame.
1172     // This function re-enables segmentation if appropriate.
1173     vp9_update_mbgraph_stats(cpi);
1174
1175     // If segmentation was enabled set those features needed for the
1176     // arf itself.
1177     if (seg->enabled) {
1178       seg->update_map = 1;
1179       seg->update_data = 1;
1180
1181       qi_delta =
1182           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1183       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1184       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1185
1186       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1187       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1188
1189       // Where relevant assume segment data is delta data
1190       seg->abs_delta = SEGMENT_DELTADATA;
1191     }
1192   } else if (seg->enabled) {
1193     // All other frames if segmentation has been enabled
1194
1195     // First normal frame in a valid gf or alt ref group
1196     if (rc->frames_since_golden == 0) {
1197       // Set up segment features for normal frames in an arf group
1198       if (rc->source_alt_ref_active) {
1199         seg->update_map = 0;
1200         seg->update_data = 1;
1201         seg->abs_delta = SEGMENT_DELTADATA;
1202
1203         qi_delta =
1204             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1205         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1206         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1207
1208         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1209         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1210
1211         // Segment coding disabled for compred testing
1212         if (high_q || (cpi->static_mb_pct == 100)) {
1213           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1214           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1215           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1216         }
1217       } else {
1218         // Disable segmentation and clear down features if alt ref
1219         // is not active for this group
1220
1221         vp9_disable_segmentation(seg);
1222
1223         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1224
1225         seg->update_map = 0;
1226         seg->update_data = 0;
1227
1228         vp9_clearall_segfeatures(seg);
1229       }
1230     } else if (rc->is_src_frame_alt_ref) {
1231       // Special case where we are coding over the top of a previous
1232       // alt ref frame.
1233       // Segment coding disabled for compred testing
1234
1235       // Enable ref frame features for segment 0 as well
1236       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1237       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1238
1239       // All mbs should use ALTREF_FRAME
1240       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1241       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1242       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1243       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1244
1245       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1246       if (high_q) {
1247         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1248         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1249       }
1250       // Enable data update
1251       seg->update_data = 1;
1252     } else {
1253       // All other frames.
1254
1255       // No updates.. leave things as they are.
1256       seg->update_map = 0;
1257       seg->update_data = 0;
1258     }
1259   }
1260 }
1261 #endif  // !CONFIG_REALTIME_ONLY
1262
1263 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1264   VP9_COMMON *const cm = &cpi->common;
1265   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1266   uint8_t *cache_ptr = cm->last_frame_seg_map;
1267   int row, col;
1268
1269   for (row = 0; row < cm->mi_rows; row++) {
1270     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1271     uint8_t *cache = cache_ptr;
1272     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1273       cache[0] = mi_8x8[0]->segment_id;
1274     mi_8x8_ptr += cm->mi_stride;
1275     cache_ptr += cm->mi_cols;
1276   }
1277 }
1278
1279 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1280   VP9_COMMON *cm = &cpi->common;
1281   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1282
1283   if (!cpi->lookahead)
1284     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1285                                         cm->subsampling_x, cm->subsampling_y,
1286 #if CONFIG_VP9_HIGHBITDEPTH
1287                                         cm->use_highbitdepth,
1288 #endif
1289                                         oxcf->lag_in_frames);
1290   if (!cpi->lookahead)
1291     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1292                        "Failed to allocate lag buffers");
1293
1294   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1295   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1296                                cm->subsampling_x, cm->subsampling_y,
1297 #if CONFIG_VP9_HIGHBITDEPTH
1298                                cm->use_highbitdepth,
1299 #endif
1300                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1301                                NULL, NULL, NULL))
1302     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1303                        "Failed to allocate altref buffer");
1304 }
1305
1306 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1307   VP9_COMMON *const cm = &cpi->common;
1308   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1309                                cm->subsampling_x, cm->subsampling_y,
1310 #if CONFIG_VP9_HIGHBITDEPTH
1311                                cm->use_highbitdepth,
1312 #endif
1313                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1314                                NULL, NULL, NULL))
1315     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1316                        "Failed to allocate last frame buffer");
1317
1318   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1319                                cm->subsampling_x, cm->subsampling_y,
1320 #if CONFIG_VP9_HIGHBITDEPTH
1321                                cm->use_highbitdepth,
1322 #endif
1323                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1324                                NULL, NULL, NULL))
1325     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1326                        "Failed to allocate scaled source buffer");
1327
1328   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1329   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1330   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1331   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1332       cpi->svc.number_spatial_layers > 2) {
1333     cpi->svc.scaled_temp_is_alloc = 1;
1334     if (vpx_realloc_frame_buffer(
1335             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1336             cm->subsampling_x, cm->subsampling_y,
1337 #if CONFIG_VP9_HIGHBITDEPTH
1338             cm->use_highbitdepth,
1339 #endif
1340             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1341       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1342                          "Failed to allocate scaled_frame for svc ");
1343   }
1344
1345   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1346                                cm->subsampling_x, cm->subsampling_y,
1347 #if CONFIG_VP9_HIGHBITDEPTH
1348                                cm->use_highbitdepth,
1349 #endif
1350                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1351                                NULL, NULL, NULL))
1352     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1353                        "Failed to allocate scaled last source buffer");
1354 #ifdef ENABLE_KF_DENOISE
1355   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1356                                cm->subsampling_x, cm->subsampling_y,
1357 #if CONFIG_VP9_HIGHBITDEPTH
1358                                cm->use_highbitdepth,
1359 #endif
1360                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1361                                NULL, NULL, NULL))
1362     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1363                        "Failed to allocate unscaled raw source frame buffer");
1364
1365   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1366                                cm->subsampling_x, cm->subsampling_y,
1367 #if CONFIG_VP9_HIGHBITDEPTH
1368                                cm->use_highbitdepth,
1369 #endif
1370                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1371                                NULL, NULL, NULL))
1372     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1373                        "Failed to allocate scaled raw source frame buffer");
1374 #endif
1375 }
1376
1377 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1378   VP9_COMMON *cm = &cpi->common;
1379   int mi_size = cm->mi_cols * cm->mi_rows;
1380
1381   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1382   if (!cpi->mbmi_ext_base) return 1;
1383
1384   return 0;
1385 }
1386
1387 static void alloc_compressor_data(VP9_COMP *cpi) {
1388   VP9_COMMON *cm = &cpi->common;
1389   int sb_rows;
1390
1391   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1392
1393   alloc_context_buffers_ext(cpi);
1394
1395   vpx_free(cpi->tile_tok[0][0]);
1396
1397   {
1398     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1399     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1400                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1401   }
1402
1403   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1404   vpx_free(cpi->tplist[0][0]);
1405   CHECK_MEM_ERROR(
1406       cm, cpi->tplist[0][0],
1407       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1408
1409   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1410 }
1411
1412 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1413   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1414   vp9_rc_update_framerate(cpi);
1415 }
1416
1417 static void set_tile_limits(VP9_COMP *cpi) {
1418   VP9_COMMON *const cm = &cpi->common;
1419
1420   int min_log2_tile_cols, max_log2_tile_cols;
1421   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1422
1423   cm->log2_tile_cols =
1424       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1425   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1426
1427   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1428     const int level_tile_cols =
1429         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1430     if (cm->log2_tile_cols > level_tile_cols) {
1431       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1432     }
1433   }
1434 }
1435
1436 static void update_frame_size(VP9_COMP *cpi) {
1437   VP9_COMMON *const cm = &cpi->common;
1438   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1439
1440   vp9_set_mb_mi(cm, cm->width, cm->height);
1441   vp9_init_context_buffers(cm);
1442   vp9_init_macroblockd(cm, xd, NULL);
1443   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1444   memset(cpi->mbmi_ext_base, 0,
1445          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1446
1447   set_tile_limits(cpi);
1448 }
1449
1450 static void init_buffer_indices(VP9_COMP *cpi) {
1451   int ref_frame;
1452
1453   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1454     cpi->ref_fb_idx[ref_frame] = ref_frame;
1455
1456   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1457   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1458   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1459 }
1460
1461 static void init_level_constraint(LevelConstraint *lc) {
1462   lc->level_index = -1;
1463   lc->max_cpb_size = INT_MAX;
1464   lc->max_frame_size = INT_MAX;
1465   lc->fail_flag = 0;
1466 }
1467
1468 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1469   vpx_clear_system_state();
1470   ls->level_index = level_index;
1471   if (level_index >= 0) {
1472     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1473   }
1474 }
1475
1476 static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1477   VP9_COMMON *const cm = &cpi->common;
1478
1479   cpi->oxcf = *oxcf;
1480   cpi->framerate = oxcf->init_framerate;
1481   cm->profile = oxcf->profile;
1482   cm->bit_depth = oxcf->bit_depth;
1483 #if CONFIG_VP9_HIGHBITDEPTH
1484   cm->use_highbitdepth = oxcf->use_highbitdepth;
1485 #endif
1486   cm->color_space = oxcf->color_space;
1487   cm->color_range = oxcf->color_range;
1488
1489   cpi->target_level = oxcf->target_level;
1490   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1491   set_level_constraint(&cpi->level_constraint,
1492                        get_level_index(cpi->target_level));
1493
1494   cm->width = oxcf->width;
1495   cm->height = oxcf->height;
1496   alloc_compressor_data(cpi);
1497
1498   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1499
1500   // Single thread case: use counts in common.
1501   cpi->td.counts = &cm->counts;
1502
1503   // Spatial scalability.
1504   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1505   // Temporal scalability.
1506   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1507
1508   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1509       ((cpi->svc.number_temporal_layers > 1 ||
1510         cpi->svc.number_spatial_layers > 1) &&
1511        cpi->oxcf.pass != 1)) {
1512     vp9_init_layer_context(cpi);
1513   }
1514
1515   // change includes all joint functionality
1516   vp9_change_config(cpi, oxcf);
1517
1518   cpi->static_mb_pct = 0;
1519   cpi->ref_frame_flags = 0;
1520
1521   init_buffer_indices(cpi);
1522
1523   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1524 }
1525
1526 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
1527                                 const VP9EncoderConfig *oxcf) {
1528   const int64_t bandwidth = oxcf->target_bandwidth;
1529   const int64_t starting = oxcf->starting_buffer_level_ms;
1530   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1531   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1532
1533   rc->starting_buffer_level = starting * bandwidth / 1000;
1534   rc->optimal_buffer_level =
1535       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1536   rc->maximum_buffer_size =
1537       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1538 }
1539
1540 #if CONFIG_VP9_HIGHBITDEPTH
1541 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1542 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1543   cpi->fn_ptr[BT].sdf = SDF;                             \
1544   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1545   cpi->fn_ptr[BT].vf = VF;                               \
1546   cpi->fn_ptr[BT].svf = SVF;                             \
1547   cpi->fn_ptr[BT].svaf = SVAF;                           \
1548   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1549   cpi->fn_ptr[BT].sdx8f = NULL;
1550
1551 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1552   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1553                                      int source_stride,                        \
1554                                      const uint8_t *ref_ptr, int ref_stride) { \
1555     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1556   }                                                                            \
1557   static unsigned int fnname##_bits10(                                         \
1558       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1559       int ref_stride) {                                                        \
1560     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1561   }                                                                            \
1562   static unsigned int fnname##_bits12(                                         \
1563       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1564       int ref_stride) {                                                        \
1565     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1566   }
1567
1568 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1569   static unsigned int fnname##_bits8(                                          \
1570       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1571       int ref_stride, const uint8_t *second_pred) {                            \
1572     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1573   }                                                                            \
1574   static unsigned int fnname##_bits10(                                         \
1575       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1576       int ref_stride, const uint8_t *second_pred) {                            \
1577     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1578            2;                                                                  \
1579   }                                                                            \
1580   static unsigned int fnname##_bits12(                                         \
1581       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1582       int ref_stride, const uint8_t *second_pred) {                            \
1583     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1584            4;                                                                  \
1585   }
1586
1587 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1588   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1589                              const uint8_t *const ref_ptr[], int ref_stride,  \
1590                              unsigned int *sad_array) {                       \
1591     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1592   }                                                                           \
1593   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1594                               const uint8_t *const ref_ptr[], int ref_stride, \
1595                               unsigned int *sad_array) {                      \
1596     int i;                                                                    \
1597     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1598     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1599   }                                                                           \
1600   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1601                               const uint8_t *const ref_ptr[], int ref_stride, \
1602                               unsigned int *sad_array) {                      \
1603     int i;                                                                    \
1604     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1605     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1606   }
1607
1608 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1609 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1610 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1611 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1612 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1613 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1614 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1615 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1616 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1617 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1618 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1619 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1620 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1621 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1622 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1623 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1624 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1625 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1626 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1627 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1628 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1629 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1630 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1631 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1632 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1633 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1634 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1635 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1636 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1637 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1638 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1639 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1640 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1641 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1642 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1643 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1644 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1645 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1646 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1647
1648 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1649   VP9_COMMON *const cm = &cpi->common;
1650   if (cm->use_highbitdepth) {
1651     switch (cm->bit_depth) {
1652       case VPX_BITS_8:
1653         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1654                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1655                    vpx_highbd_8_sub_pixel_variance32x16,
1656                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1657                    vpx_highbd_sad32x16x4d_bits8)
1658
1659         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1660                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1661                    vpx_highbd_8_sub_pixel_variance16x32,
1662                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1663                    vpx_highbd_sad16x32x4d_bits8)
1664
1665         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1666                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1667                    vpx_highbd_8_sub_pixel_variance64x32,
1668                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1669                    vpx_highbd_sad64x32x4d_bits8)
1670
1671         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1672                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1673                    vpx_highbd_8_sub_pixel_variance32x64,
1674                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1675                    vpx_highbd_sad32x64x4d_bits8)
1676
1677         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1678                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1679                    vpx_highbd_8_sub_pixel_variance32x32,
1680                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1681                    vpx_highbd_sad32x32x4d_bits8)
1682
1683         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1684                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1685                    vpx_highbd_8_sub_pixel_variance64x64,
1686                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1687                    vpx_highbd_sad64x64x4d_bits8)
1688
1689         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1690                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1691                    vpx_highbd_8_sub_pixel_variance16x16,
1692                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1693                    vpx_highbd_sad16x16x4d_bits8)
1694
1695         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1696                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1697                    vpx_highbd_8_sub_pixel_variance16x8,
1698                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1699                    vpx_highbd_sad16x8x4d_bits8)
1700
1701         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1702                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1703                    vpx_highbd_8_sub_pixel_variance8x16,
1704                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1705                    vpx_highbd_sad8x16x4d_bits8)
1706
1707         HIGHBD_BFP(
1708             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1709             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1710             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1711
1712         HIGHBD_BFP(
1713             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1714             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1715             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1716
1717         HIGHBD_BFP(
1718             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1719             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1720             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1721
1722         HIGHBD_BFP(
1723             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1724             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1725             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1726         break;
1727
1728       case VPX_BITS_10:
1729         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1730                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1731                    vpx_highbd_10_sub_pixel_variance32x16,
1732                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1733                    vpx_highbd_sad32x16x4d_bits10)
1734
1735         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1736                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1737                    vpx_highbd_10_sub_pixel_variance16x32,
1738                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1739                    vpx_highbd_sad16x32x4d_bits10)
1740
1741         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1742                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1743                    vpx_highbd_10_sub_pixel_variance64x32,
1744                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1745                    vpx_highbd_sad64x32x4d_bits10)
1746
1747         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1748                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1749                    vpx_highbd_10_sub_pixel_variance32x64,
1750                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1751                    vpx_highbd_sad32x64x4d_bits10)
1752
1753         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1754                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1755                    vpx_highbd_10_sub_pixel_variance32x32,
1756                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1757                    vpx_highbd_sad32x32x4d_bits10)
1758
1759         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1760                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1761                    vpx_highbd_10_sub_pixel_variance64x64,
1762                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1763                    vpx_highbd_sad64x64x4d_bits10)
1764
1765         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1766                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1767                    vpx_highbd_10_sub_pixel_variance16x16,
1768                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1769                    vpx_highbd_sad16x16x4d_bits10)
1770
1771         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1772                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1773                    vpx_highbd_10_sub_pixel_variance16x8,
1774                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1775                    vpx_highbd_sad16x8x4d_bits10)
1776
1777         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1778                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1779                    vpx_highbd_10_sub_pixel_variance8x16,
1780                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1781                    vpx_highbd_sad8x16x4d_bits10)
1782
1783         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1784                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1785                    vpx_highbd_10_sub_pixel_variance8x8,
1786                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1787                    vpx_highbd_sad8x8x4d_bits10)
1788
1789         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1790                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1791                    vpx_highbd_10_sub_pixel_variance8x4,
1792                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1793                    vpx_highbd_sad8x4x4d_bits10)
1794
1795         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1796                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1797                    vpx_highbd_10_sub_pixel_variance4x8,
1798                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1799                    vpx_highbd_sad4x8x4d_bits10)
1800
1801         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1802                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1803                    vpx_highbd_10_sub_pixel_variance4x4,
1804                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1805                    vpx_highbd_sad4x4x4d_bits10)
1806         break;
1807
1808       default:
1809         assert(cm->bit_depth == VPX_BITS_12);
1810         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1811                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1812                    vpx_highbd_12_sub_pixel_variance32x16,
1813                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1814                    vpx_highbd_sad32x16x4d_bits12)
1815
1816         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1817                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1818                    vpx_highbd_12_sub_pixel_variance16x32,
1819                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1820                    vpx_highbd_sad16x32x4d_bits12)
1821
1822         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1823                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1824                    vpx_highbd_12_sub_pixel_variance64x32,
1825                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1826                    vpx_highbd_sad64x32x4d_bits12)
1827
1828         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1829                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1830                    vpx_highbd_12_sub_pixel_variance32x64,
1831                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1832                    vpx_highbd_sad32x64x4d_bits12)
1833
1834         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1835                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1836                    vpx_highbd_12_sub_pixel_variance32x32,
1837                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1838                    vpx_highbd_sad32x32x4d_bits12)
1839
1840         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1841                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1842                    vpx_highbd_12_sub_pixel_variance64x64,
1843                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1844                    vpx_highbd_sad64x64x4d_bits12)
1845
1846         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1847                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1848                    vpx_highbd_12_sub_pixel_variance16x16,
1849                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1850                    vpx_highbd_sad16x16x4d_bits12)
1851
1852         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1853                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1854                    vpx_highbd_12_sub_pixel_variance16x8,
1855                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1856                    vpx_highbd_sad16x8x4d_bits12)
1857
1858         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1859                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1860                    vpx_highbd_12_sub_pixel_variance8x16,
1861                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1862                    vpx_highbd_sad8x16x4d_bits12)
1863
1864         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1865                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1866                    vpx_highbd_12_sub_pixel_variance8x8,
1867                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1868                    vpx_highbd_sad8x8x4d_bits12)
1869
1870         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1871                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1872                    vpx_highbd_12_sub_pixel_variance8x4,
1873                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1874                    vpx_highbd_sad8x4x4d_bits12)
1875
1876         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1877                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1878                    vpx_highbd_12_sub_pixel_variance4x8,
1879                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1880                    vpx_highbd_sad4x8x4d_bits12)
1881
1882         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1883                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1884                    vpx_highbd_12_sub_pixel_variance4x4,
1885                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1886                    vpx_highbd_sad4x4x4d_bits12)
1887         break;
1888     }
1889   }
1890 }
1891 #endif  // CONFIG_VP9_HIGHBITDEPTH
1892
1893 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1894   VP9_COMMON *const cm = &cpi->common;
1895
1896   // Create the encoder segmentation map and set all entries to 0
1897   vpx_free(cpi->segmentation_map);
1898   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1899                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1900
1901   // Create a map used for cyclic background refresh.
1902   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1903   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1904                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1905
1906   // Create a map used to mark inactive areas.
1907   vpx_free(cpi->active_map.map);
1908   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1909                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1910
1911   // And a place holder structure is the coding context
1912   // for use if we want to save and restore it
1913   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1914   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1915                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1916 }
1917
1918 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1919   VP9_COMMON *const cm = &cpi->common;
1920   if (cpi->prev_partition == NULL) {
1921     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1922                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1923                                              sizeof(*cpi->prev_partition)));
1924   }
1925   if (cpi->prev_segment_id == NULL) {
1926     CHECK_MEM_ERROR(
1927         cm, cpi->prev_segment_id,
1928         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1929                              sizeof(*cpi->prev_segment_id)));
1930   }
1931   if (cpi->prev_variance_low == NULL) {
1932     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1933                     (uint8_t *)vpx_calloc(
1934                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1935                         sizeof(*cpi->prev_variance_low)));
1936   }
1937   if (cpi->copied_frame_cnt == NULL) {
1938     CHECK_MEM_ERROR(
1939         cm, cpi->copied_frame_cnt,
1940         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1941                               sizeof(*cpi->copied_frame_cnt)));
1942   }
1943 }
1944
1945 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1946   VP9_COMMON *const cm = &cpi->common;
1947   RATE_CONTROL *const rc = &cpi->rc;
1948   int last_w = cpi->oxcf.width;
1949   int last_h = cpi->oxcf.height;
1950
1951   vp9_init_quantizer(cpi);
1952   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1953   cm->bit_depth = oxcf->bit_depth;
1954   cm->color_space = oxcf->color_space;
1955   cm->color_range = oxcf->color_range;
1956
1957   cpi->target_level = oxcf->target_level;
1958   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1959   set_level_constraint(&cpi->level_constraint,
1960                        get_level_index(cpi->target_level));
1961
1962   if (cm->profile <= PROFILE_1)
1963     assert(cm->bit_depth == VPX_BITS_8);
1964   else
1965     assert(cm->bit_depth > VPX_BITS_8);
1966
1967   cpi->oxcf = *oxcf;
1968 #if CONFIG_VP9_HIGHBITDEPTH
1969   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1970 #endif  // CONFIG_VP9_HIGHBITDEPTH
1971
1972   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
1973     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
1974   } else {
1975     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
1976   }
1977
1978   cpi->refresh_golden_frame = 0;
1979   cpi->refresh_last_frame = 1;
1980   cm->refresh_frame_context = 1;
1981   cm->reset_frame_context = 0;
1982
1983   vp9_reset_segment_features(&cm->seg);
1984   vp9_set_high_precision_mv(cpi, 0);
1985
1986   {
1987     int i;
1988
1989     for (i = 0; i < MAX_SEGMENTS; i++)
1990       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
1991   }
1992   cpi->encode_breakout = cpi->oxcf.encode_breakout;
1993
1994   set_rc_buffer_sizes(rc, &cpi->oxcf);
1995
1996   // Under a configuration change, where maximum_buffer_size may change,
1997   // keep buffer level clipped to the maximum allowed buffer size.
1998   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1999   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
2000
2001   // Set up frame rate and related parameters rate control values.
2002   vp9_new_framerate(cpi, cpi->framerate);
2003
2004   // Set absolute upper and lower quality limits
2005   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2006   rc->best_quality = cpi->oxcf.best_allowed_q;
2007
2008   cm->interp_filter = cpi->sf.default_interp_filter;
2009
2010   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2011     cm->render_width = cpi->oxcf.render_width;
2012     cm->render_height = cpi->oxcf.render_height;
2013   } else {
2014     cm->render_width = cpi->oxcf.width;
2015     cm->render_height = cpi->oxcf.height;
2016   }
2017   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2018     cm->width = cpi->oxcf.width;
2019     cm->height = cpi->oxcf.height;
2020     cpi->external_resize = 1;
2021   }
2022
2023   if (cpi->initial_width) {
2024     int new_mi_size = 0;
2025     vp9_set_mb_mi(cm, cm->width, cm->height);
2026     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2027     if (cm->mi_alloc_size < new_mi_size) {
2028       vp9_free_context_buffers(cm);
2029       alloc_compressor_data(cpi);
2030       realloc_segmentation_maps(cpi);
2031       cpi->initial_width = cpi->initial_height = 0;
2032       cpi->external_resize = 0;
2033     } else if (cm->mi_alloc_size == new_mi_size &&
2034                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2035       vp9_alloc_loop_filter(cm);
2036     }
2037   }
2038
2039   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2040       last_h != cpi->oxcf.height)
2041     update_frame_size(cpi);
2042
2043   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2044     memset(cpi->consec_zero_mv, 0,
2045            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2046     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2047       vp9_cyclic_refresh_reset_resize(cpi);
2048     rc->rc_1_frame = 0;
2049     rc->rc_2_frame = 0;
2050   }
2051
2052   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2053       ((cpi->svc.number_temporal_layers > 1 ||
2054         cpi->svc.number_spatial_layers > 1) &&
2055        cpi->oxcf.pass != 1)) {
2056     vp9_update_layer_context_change_config(cpi,
2057                                            (int)cpi->oxcf.target_bandwidth);
2058   }
2059
2060   // Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
2061   // configuration change has a large change in avg_frame_bandwidth.
2062   // For SVC check for resetting based on spatial layer average bandwidth.
2063   // Also reset buffer level to optimal level.
2064   if (cm->current_video_frame > (unsigned int)cpi->svc.number_spatial_layers) {
2065     if (cpi->use_svc) {
2066       vp9_svc_check_reset_layer_rc_flag(cpi);
2067     } else {
2068       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
2069           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
2070         rc->rc_1_frame = 0;
2071         rc->rc_2_frame = 0;
2072         rc->bits_off_target = rc->optimal_buffer_level;
2073         rc->buffer_level = rc->optimal_buffer_level;
2074       }
2075     }
2076   }
2077
2078   cpi->alt_ref_source = NULL;
2079   rc->is_src_frame_alt_ref = 0;
2080
2081 #if 0
2082   // Experimental RD Code
2083   cpi->frame_distortion = 0;
2084   cpi->last_frame_distortion = 0;
2085 #endif
2086
2087   set_tile_limits(cpi);
2088
2089   cpi->ext_refresh_frame_flags_pending = 0;
2090   cpi->ext_refresh_frame_context_pending = 0;
2091
2092 #if CONFIG_VP9_HIGHBITDEPTH
2093   highbd_set_var_fns(cpi);
2094 #endif
2095
2096   vp9_set_row_mt(cpi);
2097 }
2098
2099 #ifndef M_LOG2_E
2100 #define M_LOG2_E 0.693147180559945309417
2101 #endif
2102 #define log2f(x) (log(x) / (float)M_LOG2_E)
2103
2104 /***********************************************************************
2105  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2106  ***********************************************************************
2107  * The following 2 functions ('cal_nmvjointsadcost' and                *
2108  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2109  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2110  * function is generic, but the AVX intrinsics optimised version       *
2111  * relies on the following properties of the computed tables:          *
2112  * For cal_nmvjointsadcost:                                            *
2113  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2114  * For cal_nmvsadcosts:                                                *
2115  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2116  *         (Equal costs for both components)                           *
2117  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2118  *         (Cost function is even)                                     *
2119  * If these do not hold, then the AVX optimised version of the         *
2120  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2121  * case you can revert to using the C function instead.                *
2122  ***********************************************************************/
2123
2124 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2125   /*********************************************************************
2126    * Warning: Read the comments above before modifying this function   *
2127    *********************************************************************/
2128   mvjointsadcost[0] = 600;
2129   mvjointsadcost[1] = 300;
2130   mvjointsadcost[2] = 300;
2131   mvjointsadcost[3] = 300;
2132 }
2133
2134 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2135   /*********************************************************************
2136    * Warning: Read the comments above before modifying this function   *
2137    *********************************************************************/
2138   int i = 1;
2139
2140   mvsadcost[0][0] = 0;
2141   mvsadcost[1][0] = 0;
2142
2143   do {
2144     double z = 256 * (2 * (log2f(8 * i) + .6));
2145     mvsadcost[0][i] = (int)z;
2146     mvsadcost[1][i] = (int)z;
2147     mvsadcost[0][-i] = (int)z;
2148     mvsadcost[1][-i] = (int)z;
2149   } while (++i <= MV_MAX);
2150 }
2151
2152 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2153   int i = 1;
2154
2155   mvsadcost[0][0] = 0;
2156   mvsadcost[1][0] = 0;
2157
2158   do {
2159     double z = 256 * (2 * (log2f(8 * i) + .6));
2160     mvsadcost[0][i] = (int)z;
2161     mvsadcost[1][i] = (int)z;
2162     mvsadcost[0][-i] = (int)z;
2163     mvsadcost[1][-i] = (int)z;
2164   } while (++i <= MV_MAX);
2165 }
2166
2167 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2168   int i;
2169   BufferPool *const pool = cm->buffer_pool;
2170   cm->new_fb_idx = INVALID_IDX;
2171   for (i = 0; i < REF_FRAMES; ++i) {
2172     cm->ref_frame_map[i] = INVALID_IDX;
2173   }
2174   for (i = 0; i < FRAME_BUFFERS; ++i) {
2175     pool->frame_bufs[i].ref_count = 0;
2176   }
2177 }
2178
2179 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
2180                                  int subsampling_x, int subsampling_y) {
2181   VP9_COMMON *const cm = &cpi->common;
2182 #if !CONFIG_VP9_HIGHBITDEPTH
2183   (void)use_highbitdepth;
2184   assert(use_highbitdepth == 0);
2185 #endif
2186
2187   if (!cpi->initial_width ||
2188 #if CONFIG_VP9_HIGHBITDEPTH
2189       cm->use_highbitdepth != use_highbitdepth ||
2190 #endif
2191       cm->subsampling_x != subsampling_x ||
2192       cm->subsampling_y != subsampling_y) {
2193     cm->subsampling_x = subsampling_x;
2194     cm->subsampling_y = subsampling_y;
2195 #if CONFIG_VP9_HIGHBITDEPTH
2196     cm->use_highbitdepth = use_highbitdepth;
2197 #endif
2198     alloc_util_frame_buffers(cpi);
2199     cpi->initial_width = cm->width;
2200     cpi->initial_height = cm->height;
2201     cpi->initial_mbs = cm->MBs;
2202   }
2203 }
2204
2205 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2206 static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
2207                                               unsigned int *subsampling_x,
2208                                               unsigned int *subsampling_y) {
2209   switch (fmt) {
2210     case VPX_IMG_FMT_I420:
2211     case VPX_IMG_FMT_YV12:
2212     case VPX_IMG_FMT_I422:
2213     case VPX_IMG_FMT_I42016:
2214     case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
2215     default: *subsampling_x = 0; break;
2216   }
2217
2218   switch (fmt) {
2219     case VPX_IMG_FMT_I420:
2220     case VPX_IMG_FMT_I440:
2221     case VPX_IMG_FMT_YV12:
2222     case VPX_IMG_FMT_I42016:
2223     case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
2224     default: *subsampling_y = 0; break;
2225   }
2226 }
2227
2228 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2229 static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
2230   return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
2231 }
2232
2233 #if CONFIG_VP9_TEMPORAL_DENOISING
2234 static void setup_denoiser_buffer(VP9_COMP *cpi) {
2235   VP9_COMMON *const cm = &cpi->common;
2236   if (cpi->oxcf.noise_sensitivity > 0 &&
2237       !cpi->denoiser.frame_buffer_initialized) {
2238     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
2239                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
2240                            cm->subsampling_x, cm->subsampling_y,
2241 #if CONFIG_VP9_HIGHBITDEPTH
2242                            cm->use_highbitdepth,
2243 #endif
2244                            VP9_ENC_BORDER_IN_PIXELS))
2245       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
2246                          "Failed to allocate denoiser");
2247   }
2248 }
2249 #endif
2250
2251 void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
2252   const VP9EncoderConfig *oxcf = &cpi->oxcf;
2253   unsigned int subsampling_x, subsampling_y;
2254   const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
2255   vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
2256
2257   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
2258 #if CONFIG_VP9_TEMPORAL_DENOISING
2259   setup_denoiser_buffer(cpi);
2260 #endif
2261
2262   assert(cpi->lookahead == NULL);
2263   cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
2264                                       subsampling_y,
2265 #if CONFIG_VP9_HIGHBITDEPTH
2266                                       use_highbitdepth,
2267 #endif
2268                                       oxcf->lag_in_frames);
2269   alloc_raw_frame_buffers(cpi);
2270 }
2271
2272 VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
2273                                 BufferPool *const pool) {
2274   unsigned int i;
2275   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2276   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2277
2278   if (!cm) return NULL;
2279
2280   vp9_zero(*cpi);
2281
2282   if (setjmp(cm->error.jmp)) {
2283     cm->error.setjmp = 0;
2284     vp9_remove_compressor(cpi);
2285     return 0;
2286   }
2287
2288   cm->error.setjmp = 1;
2289   cm->alloc_mi = vp9_enc_alloc_mi;
2290   cm->free_mi = vp9_enc_free_mi;
2291   cm->setup_mi = vp9_enc_setup_mi;
2292
2293   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2294   CHECK_MEM_ERROR(
2295       cm, cm->frame_contexts,
2296       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2297
2298   cpi->use_svc = 0;
2299   cpi->resize_state = ORIG;
2300   cpi->external_resize = 0;
2301   cpi->resize_avg_qp = 0;
2302   cpi->resize_buffer_underflow = 0;
2303   cpi->use_skin_detection = 0;
2304   cpi->common.buffer_pool = pool;
2305   init_ref_frame_bufs(cm);
2306
2307   cpi->force_update_segmentation = 0;
2308
2309   init_config(cpi, oxcf);
2310   cpi->frame_info = vp9_get_frame_info(oxcf);
2311
2312   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2313
2314   init_frame_indexes(cm);
2315   cpi->partition_search_skippable_frame = 0;
2316   cpi->tile_data = NULL;
2317
2318   realloc_segmentation_maps(cpi);
2319
2320   CHECK_MEM_ERROR(
2321       cm, cpi->skin_map,
2322       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2323
2324 #if !CONFIG_REALTIME_ONLY
2325   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2326 #endif
2327
2328   CHECK_MEM_ERROR(
2329       cm, cpi->consec_zero_mv,
2330       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2331
2332   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2333                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2334   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2335                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2336   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2337                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2338   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2339                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2340   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2341                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2342   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2343                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2344   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2345                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2346   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2347                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2348
2349   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2350        i++) {
2351     CHECK_MEM_ERROR(
2352         cm, cpi->mbgraph_stats[i].mb_stats,
2353         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2354   }
2355
2356 #if CONFIG_FP_MB_STATS
2357   cpi->use_fp_mb_stats = 0;
2358   if (cpi->use_fp_mb_stats) {
2359     // a place holder used to store the first pass mb stats in the first pass
2360     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2361                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2362   } else {
2363     cpi->twopass.frame_mb_stats_buf = NULL;
2364   }
2365 #endif
2366
2367   cpi->refresh_alt_ref_frame = 0;
2368   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2369
2370   init_level_info(&cpi->level_info);
2371   init_level_constraint(&cpi->level_constraint);
2372
2373 #if CONFIG_INTERNAL_STATS
2374   cpi->b_calculate_blockiness = 1;
2375   cpi->b_calculate_consistency = 1;
2376   cpi->total_inconsistency = 0;
2377   cpi->psnr.worst = 100.0;
2378   cpi->worst_ssim = 100.0;
2379
2380   cpi->count = 0;
2381   cpi->bytes = 0;
2382
2383   if (cpi->b_calculate_psnr) {
2384     cpi->total_sq_error = 0;
2385     cpi->total_samples = 0;
2386
2387     cpi->totalp_sq_error = 0;
2388     cpi->totalp_samples = 0;
2389
2390     cpi->tot_recode_hits = 0;
2391     cpi->summed_quality = 0;
2392     cpi->summed_weights = 0;
2393     cpi->summedp_quality = 0;
2394     cpi->summedp_weights = 0;
2395   }
2396
2397   cpi->fastssim.worst = 100.0;
2398
2399   cpi->psnrhvs.worst = 100.0;
2400
2401   if (cpi->b_calculate_blockiness) {
2402     cpi->total_blockiness = 0;
2403     cpi->worst_blockiness = 0.0;
2404   }
2405
2406   if (cpi->b_calculate_consistency) {
2407     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2408                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2409                                sizeof(*cpi->ssim_vars) * 4));
2410     cpi->worst_consistency = 100.0;
2411   } else {
2412     cpi->ssim_vars = NULL;
2413   }
2414
2415 #endif
2416
2417   cpi->first_time_stamp_ever = INT64_MAX;
2418
2419   /*********************************************************************
2420    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2421    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2422    *********************************************************************/
2423   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2424   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2425   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2426   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2427   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2428   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2429
2430   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2431   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2432   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2433   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2434   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2435
2436 #if CONFIG_VP9_TEMPORAL_DENOISING
2437 #ifdef OUTPUT_YUV_DENOISED
2438   yuv_denoised_file = fopen("denoised.yuv", "ab");
2439 #endif
2440 #endif
2441 #ifdef OUTPUT_YUV_SKINMAP
2442   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2443 #endif
2444 #ifdef OUTPUT_YUV_REC
2445   yuv_rec_file = fopen("rec.yuv", "wb");
2446 #endif
2447 #ifdef OUTPUT_YUV_SVC_SRC
2448   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2449   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2450   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2451 #endif
2452
2453 #if 0
2454   framepsnr = fopen("framepsnr.stt", "a");
2455   kf_list = fopen("kf_list.stt", "w");
2456 #endif
2457
2458   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2459
2460 #if !CONFIG_REALTIME_ONLY
2461   if (oxcf->pass == 1) {
2462     vp9_init_first_pass(cpi);
2463   } else if (oxcf->pass == 2) {
2464     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2465     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2466
2467     if (cpi->svc.number_spatial_layers > 1 ||
2468         cpi->svc.number_temporal_layers > 1) {
2469       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2470       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2471       int i;
2472
2473       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2474         FIRSTPASS_STATS *const last_packet_for_layer =
2475             &stats[packets - oxcf->ss_number_layers + i];
2476         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2477         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2478         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2479           int num_frames;
2480           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2481
2482           vpx_free(lc->rc_twopass_stats_in.buf);
2483
2484           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2485           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2486                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2487           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2488           lc->twopass.stats_in = lc->twopass.stats_in_start;
2489           lc->twopass.stats_in_end =
2490               lc->twopass.stats_in_start + packets_in_layer - 1;
2491           // Note the last packet is cumulative first pass stats.
2492           // So the number of frames is packet number minus one
2493           num_frames = packets_in_layer - 1;
2494           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2495                                    lc->rc_twopass_stats_in.buf, num_frames);
2496           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2497         }
2498       }
2499
2500       for (i = 0; i < packets; ++i) {
2501         const int layer_id = (int)stats[i].spatial_layer_id;
2502         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2503             stats_copy[layer_id] != NULL) {
2504           *stats_copy[layer_id] = stats[i];
2505           ++stats_copy[layer_id];
2506         }
2507       }
2508
2509       vp9_init_second_pass_spatial_svc(cpi);
2510     } else {
2511       int num_frames;
2512 #if CONFIG_FP_MB_STATS
2513       if (cpi->use_fp_mb_stats) {
2514         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2515         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2516
2517         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2518             oxcf->firstpass_mb_stats_in.buf;
2519         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2520             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2521             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2522       }
2523 #endif
2524
2525       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2526       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2527       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2528       // Note the last packet is cumulative first pass stats.
2529       // So the number of frames is packet number minus one
2530       num_frames = packets - 1;
2531       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2532                                oxcf->two_pass_stats_in.buf, num_frames);
2533
2534       vp9_init_second_pass(cpi);
2535     }
2536   }
2537 #endif  // !CONFIG_REALTIME_ONLY
2538
2539   cpi->mb_wiener_var_cols = 0;
2540   cpi->mb_wiener_var_rows = 0;
2541   cpi->mb_wiener_variance = NULL;
2542
2543   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2544   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2545
2546   {
2547     const int bsize = BLOCK_16X16;
2548     const int w = num_8x8_blocks_wide_lookup[bsize];
2549     const int h = num_8x8_blocks_high_lookup[bsize];
2550     const int num_cols = (cm->mi_cols + w - 1) / w;
2551     const int num_rows = (cm->mi_rows + h - 1) / h;
2552     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2553                     vpx_calloc(num_rows * num_cols,
2554                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2555   }
2556
2557   cpi->kmeans_data_arr_alloc = 0;
2558 #if CONFIG_NON_GREEDY_MV
2559   cpi->tpl_ready = 0;
2560 #endif  // CONFIG_NON_GREEDY_MV
2561   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2562
2563   // Allocate memory to store variances for a frame.
2564   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2565   cpi->source_var_thresh = 0;
2566   cpi->frames_till_next_var_check = 0;
2567 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2568   cpi->fn_ptr[BT].sdf = SDF;                             \
2569   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2570   cpi->fn_ptr[BT].vf = VF;                               \
2571   cpi->fn_ptr[BT].svf = SVF;                             \
2572   cpi->fn_ptr[BT].svaf = SVAF;                           \
2573   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2574   cpi->fn_ptr[BT].sdx8f = SDX8F;
2575
2576   // TODO(angiebird): make sdx8f available for every block size
2577   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2578       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2579       vpx_sad32x16x4d, NULL)
2580
2581   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2582       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2583       vpx_sad16x32x4d, NULL)
2584
2585   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2586       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2587       vpx_sad64x32x4d, NULL)
2588
2589   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2590       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2591       vpx_sad32x64x4d, NULL)
2592
2593   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2594       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2595       vpx_sad32x32x4d, vpx_sad32x32x8)
2596
2597   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2598       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2599       vpx_sad64x64x4d, NULL)
2600
2601   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2602       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2603       vpx_sad16x16x4d, vpx_sad16x16x8)
2604
2605   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2606       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2607       vpx_sad16x8x4d, vpx_sad16x8x8)
2608
2609   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2610       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2611       vpx_sad8x16x4d, vpx_sad8x16x8)
2612
2613   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2614       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2615       vpx_sad8x8x8)
2616
2617   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2618       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2619       NULL)
2620
2621   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2622       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2623       NULL)
2624
2625   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2626       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2627       vpx_sad4x4x8)
2628
2629 #if CONFIG_VP9_HIGHBITDEPTH
2630   highbd_set_var_fns(cpi);
2631 #endif
2632
2633   /* vp9_init_quantizer() is first called here. Add check in
2634    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2635    * called later when needed. This will avoid unnecessary calls of
2636    * vp9_init_quantizer() for every frame.
2637    */
2638   vp9_init_quantizer(cpi);
2639
2640   vp9_loop_filter_init(cm);
2641
2642   // Set up the unit scaling factor used during motion search.
2643 #if CONFIG_VP9_HIGHBITDEPTH
2644   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2645                                     cm->width, cm->height,
2646                                     cm->use_highbitdepth);
2647 #else
2648   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2649                                     cm->width, cm->height);
2650 #endif  // CONFIG_VP9_HIGHBITDEPTH
2651   cpi->td.mb.me_sf = &cpi->me_sf;
2652
2653   cm->error.setjmp = 0;
2654
2655 #if CONFIG_RATE_CTRL
2656   encode_command_init(&cpi->encode_command);
2657   partition_info_init(cpi);
2658   motion_vector_info_init(cpi);
2659 #endif
2660
2661   return cpi;
2662 }
2663
2664 #if CONFIG_INTERNAL_STATS
2665 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2666
2667 #define SNPRINT2(H, T, V) \
2668   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2669 #endif  // CONFIG_INTERNAL_STATS
2670
2671 static void free_tpl_buffer(VP9_COMP *cpi);
2672
2673 void vp9_remove_compressor(VP9_COMP *cpi) {
2674   VP9_COMMON *cm;
2675   unsigned int i;
2676   int t;
2677
2678   if (!cpi) return;
2679
2680 #if CONFIG_INTERNAL_STATS
2681   vpx_free(cpi->ssim_vars);
2682 #endif
2683
2684   cm = &cpi->common;
2685   if (cm->current_video_frame > 0) {
2686 #if CONFIG_INTERNAL_STATS
2687     vpx_clear_system_state();
2688
2689     if (cpi->oxcf.pass != 1) {
2690       char headings[512] = { 0 };
2691       char results[512] = { 0 };
2692       FILE *f = fopen("opsnr.stt", "a");
2693       double time_encoded =
2694           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2695           10000000.000;
2696       double total_encode_time =
2697           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2698       const double dr =
2699           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2700       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2701       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2702       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2703
2704       if (cpi->b_calculate_psnr) {
2705         const double total_psnr = vpx_sse_to_psnr(
2706             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2707         const double totalp_psnr = vpx_sse_to_psnr(
2708             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2709         const double total_ssim =
2710             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2711         const double totalp_ssim =
2712             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2713
2714         snprintf(headings, sizeof(headings),
2715                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2716                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2717                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2718                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2719         snprintf(results, sizeof(results),
2720                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2721                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2722                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2723                  "%7.3f\t%7.3f\t%7.3f",
2724                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2725                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2726                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2727                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2728                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2729                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2730                  cpi->psnr.stat[V] / cpi->count);
2731
2732         if (cpi->b_calculate_blockiness) {
2733           SNPRINT(headings, "\t  Block\tWstBlck");
2734           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2735           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2736         }
2737
2738         if (cpi->b_calculate_consistency) {
2739           double consistency =
2740               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2741                               (double)cpi->total_inconsistency);
2742
2743           SNPRINT(headings, "\tConsist\tWstCons");
2744           SNPRINT2(results, "\t%7.3f", consistency);
2745           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2746         }
2747
2748         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2749         SNPRINT2(results, "\t%8.0f", total_encode_time);
2750         SNPRINT2(results, "\t%7.2f", rate_err);
2751         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2752
2753         fprintf(f, "%s\tAPsnr611\n", headings);
2754         fprintf(
2755             f, "%s\t%7.3f\n", results,
2756             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2757                 (cpi->count * 8));
2758       }
2759
2760       fclose(f);
2761     }
2762 #endif
2763
2764 #if 0
2765     {
2766       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2767       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2768       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2769              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2770              cpi->time_compress_data / 1000,
2771              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2772     }
2773 #endif
2774   }
2775
2776 #if CONFIG_VP9_TEMPORAL_DENOISING
2777   vp9_denoiser_free(&(cpi->denoiser));
2778 #endif
2779
2780   if (cpi->kmeans_data_arr_alloc) {
2781 #if CONFIG_MULTITHREAD
2782     pthread_mutex_destroy(&cpi->kmeans_mutex);
2783 #endif
2784     vpx_free(cpi->kmeans_data_arr);
2785   }
2786
2787   free_tpl_buffer(cpi);
2788
2789   for (t = 0; t < cpi->num_workers; ++t) {
2790     VPxWorker *const worker = &cpi->workers[t];
2791     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2792
2793     // Deallocate allocated threads.
2794     vpx_get_worker_interface()->end(worker);
2795
2796     // Deallocate allocated thread data.
2797     if (t < cpi->num_workers - 1) {
2798       vpx_free(thread_data->td->counts);
2799       vp9_free_pc_tree(thread_data->td);
2800       vpx_free(thread_data->td);
2801     }
2802   }
2803   vpx_free(cpi->tile_thr_data);
2804   vpx_free(cpi->workers);
2805   vp9_row_mt_mem_dealloc(cpi);
2806
2807   if (cpi->num_workers > 1) {
2808     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2809     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2810   }
2811
2812 #if !CONFIG_REALTIME_ONLY
2813   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2814 #endif
2815
2816   dealloc_compressor_data(cpi);
2817
2818   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2819        ++i) {
2820     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2821   }
2822
2823 #if CONFIG_FP_MB_STATS
2824   if (cpi->use_fp_mb_stats) {
2825     vpx_free(cpi->twopass.frame_mb_stats_buf);
2826     cpi->twopass.frame_mb_stats_buf = NULL;
2827   }
2828 #endif
2829
2830   vp9_remove_common(cm);
2831   vp9_free_ref_frame_buffers(cm->buffer_pool);
2832 #if CONFIG_VP9_POSTPROC
2833   vp9_free_postproc_buffers(cm);
2834 #endif
2835   vpx_free(cpi);
2836
2837 #if CONFIG_VP9_TEMPORAL_DENOISING
2838 #ifdef OUTPUT_YUV_DENOISED
2839   fclose(yuv_denoised_file);
2840 #endif
2841 #endif
2842 #ifdef OUTPUT_YUV_SKINMAP
2843   fclose(yuv_skinmap_file);
2844 #endif
2845 #ifdef OUTPUT_YUV_REC
2846   fclose(yuv_rec_file);
2847 #endif
2848 #ifdef OUTPUT_YUV_SVC_SRC
2849   fclose(yuv_svc_src[0]);
2850   fclose(yuv_svc_src[1]);
2851   fclose(yuv_svc_src[2]);
2852 #endif
2853
2854 #if 0
2855
2856   if (keyfile)
2857     fclose(keyfile);
2858
2859   if (framepsnr)
2860     fclose(framepsnr);
2861
2862   if (kf_list)
2863     fclose(kf_list);
2864
2865 #endif
2866 }
2867
2868 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2869   if (is_psnr_calc_enabled(cpi)) {
2870 #if CONFIG_VP9_HIGHBITDEPTH
2871     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2872                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2873 #else
2874     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2875 #endif
2876     return 1;
2877   } else {
2878     vp9_zero(*psnr);
2879     return 0;
2880   }
2881 }
2882
2883 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2884   if (ref_frame_flags > 7) return -1;
2885
2886   cpi->ref_frame_flags = ref_frame_flags;
2887   return 0;
2888 }
2889
2890 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2891   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2892   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2893   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2894   cpi->ext_refresh_frame_flags_pending = 1;
2895 }
2896
2897 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2898     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2899   MV_REFERENCE_FRAME ref_frame = NONE;
2900   if (ref_frame_flag == VP9_LAST_FLAG)
2901     ref_frame = LAST_FRAME;
2902   else if (ref_frame_flag == VP9_GOLD_FLAG)
2903     ref_frame = GOLDEN_FRAME;
2904   else if (ref_frame_flag == VP9_ALT_FLAG)
2905     ref_frame = ALTREF_FRAME;
2906
2907   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2908 }
2909
2910 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2911                            YV12_BUFFER_CONFIG *sd) {
2912   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2913   if (cfg) {
2914     vpx_yv12_copy_frame(cfg, sd);
2915     return 0;
2916   } else {
2917     return -1;
2918   }
2919 }
2920
2921 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2922                           YV12_BUFFER_CONFIG *sd) {
2923   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2924   if (cfg) {
2925     vpx_yv12_copy_frame(sd, cfg);
2926     return 0;
2927   } else {
2928     return -1;
2929   }
2930 }
2931
2932 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2933   cpi->ext_refresh_frame_context = update;
2934   cpi->ext_refresh_frame_context_pending = 1;
2935   return 0;
2936 }
2937
2938 #ifdef OUTPUT_YUV_REC
2939 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2940   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2941   uint8_t *src = s->y_buffer;
2942   int h = cm->height;
2943
2944 #if CONFIG_VP9_HIGHBITDEPTH
2945   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2946     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2947
2948     do {
2949       fwrite(src16, s->y_width, 2, yuv_rec_file);
2950       src16 += s->y_stride;
2951     } while (--h);
2952
2953     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2954     h = s->uv_height;
2955
2956     do {
2957       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2958       src16 += s->uv_stride;
2959     } while (--h);
2960
2961     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2962     h = s->uv_height;
2963
2964     do {
2965       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2966       src16 += s->uv_stride;
2967     } while (--h);
2968
2969     fflush(yuv_rec_file);
2970     return;
2971   }
2972 #endif  // CONFIG_VP9_HIGHBITDEPTH
2973
2974   do {
2975     fwrite(src, s->y_width, 1, yuv_rec_file);
2976     src += s->y_stride;
2977   } while (--h);
2978
2979   src = s->u_buffer;
2980   h = s->uv_height;
2981
2982   do {
2983     fwrite(src, s->uv_width, 1, yuv_rec_file);
2984     src += s->uv_stride;
2985   } while (--h);
2986
2987   src = s->v_buffer;
2988   h = s->uv_height;
2989
2990   do {
2991     fwrite(src, s->uv_width, 1, yuv_rec_file);
2992     src += s->uv_stride;
2993   } while (--h);
2994
2995   fflush(yuv_rec_file);
2996 }
2997 #endif
2998
2999 #if CONFIG_VP9_HIGHBITDEPTH
3000 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3001                                                 YV12_BUFFER_CONFIG *dst,
3002                                                 int bd) {
3003 #else
3004 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3005                                                 YV12_BUFFER_CONFIG *dst) {
3006 #endif  // CONFIG_VP9_HIGHBITDEPTH
3007   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
3008   int i;
3009   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3010                                    src->v_buffer };
3011   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3012   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
3013                               src->uv_crop_width };
3014   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
3015                                src->uv_crop_height };
3016   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3017   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3018   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
3019                               dst->uv_crop_width };
3020   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
3021                                dst->uv_crop_height };
3022
3023   for (i = 0; i < MAX_MB_PLANE; ++i) {
3024 #if CONFIG_VP9_HIGHBITDEPTH
3025     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3026       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
3027                               src_strides[i], dsts[i], dst_heights[i],
3028                               dst_widths[i], dst_strides[i], bd);
3029     } else {
3030       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3031                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3032     }
3033 #else
3034     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3035                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3036 #endif  // CONFIG_VP9_HIGHBITDEPTH
3037   }
3038   vpx_extend_frame_borders(dst);
3039 }
3040
3041 #if CONFIG_VP9_HIGHBITDEPTH
3042 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
3043                                    YV12_BUFFER_CONFIG *dst, int bd,
3044                                    INTERP_FILTER filter_type,
3045                                    int phase_scaler) {
3046   const int src_w = src->y_crop_width;
3047   const int src_h = src->y_crop_height;
3048   const int dst_w = dst->y_crop_width;
3049   const int dst_h = dst->y_crop_height;
3050   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3051                                    src->v_buffer };
3052   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3053   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3054   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3055   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
3056   int x, y, i;
3057
3058   for (i = 0; i < MAX_MB_PLANE; ++i) {
3059     const int factor = (i == 0 || i == 3 ? 1 : 2);
3060     const int src_stride = src_strides[i];
3061     const int dst_stride = dst_strides[i];
3062     for (y = 0; y < dst_h; y += 16) {
3063       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
3064       for (x = 0; x < dst_w; x += 16) {
3065         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
3066         const uint8_t *src_ptr = srcs[i] +
3067                                  (y / factor) * src_h / dst_h * src_stride +
3068                                  (x / factor) * src_w / dst_w;
3069         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
3070
3071         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3072           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
3073                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
3074                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3075                                16 * src_h / dst_h, 16 / factor, 16 / factor,
3076                                bd);
3077         } else {
3078           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
3079                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3080                         16 * src_h / dst_h, 16 / factor, 16 / factor);
3081         }
3082       }
3083     }
3084   }
3085
3086   vpx_extend_frame_borders(dst);
3087 }
3088 #endif  // CONFIG_VP9_HIGHBITDEPTH
3089
3090 #if !CONFIG_REALTIME_ONLY
3091 static int scale_down(VP9_COMP *cpi, int q) {
3092   RATE_CONTROL *const rc = &cpi->rc;
3093   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3094   int scale = 0;
3095   assert(frame_is_kf_gf_arf(cpi));
3096
3097   if (rc->frame_size_selector == UNSCALED &&
3098       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
3099     const int max_size_thresh =
3100         (int)(rate_thresh_mult[SCALE_STEP1] *
3101               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
3102     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
3103   }
3104   return scale;
3105 }
3106
3107 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3108   const RATE_CONTROL *const rc = &cpi->rc;
3109   int big_miss_high;
3110
3111   if (frame_is_kf_gf_arf(cpi))
3112     big_miss_high = rc->this_frame_target * 3 / 2;
3113   else
3114     big_miss_high = rc->this_frame_target * 2;
3115
3116   return big_miss_high;
3117 }
3118
3119 static int big_rate_miss(VP9_COMP *cpi) {
3120   const RATE_CONTROL *const rc = &cpi->rc;
3121   int big_miss_high;
3122   int big_miss_low;
3123
3124   // Ignore for overlay frames
3125   if (rc->is_src_frame_alt_ref) {
3126     return 0;
3127   } else {
3128     big_miss_low = (rc->this_frame_target / 2);
3129     big_miss_high = big_rate_miss_high_threshold(cpi);
3130
3131     return (rc->projected_frame_size > big_miss_high) ||
3132            (rc->projected_frame_size < big_miss_low);
3133   }
3134 }
3135
3136 // test in two pass for the first
3137 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3138   if (cpi->oxcf.pass == 2) {
3139     TWO_PASS *const twopass = &cpi->twopass;
3140     GF_GROUP *const gf_group = &twopass->gf_group;
3141     const int gfg_index = gf_group->index;
3142
3143     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3144     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3145            gf_group->update_type[gfg_index] == LF_UPDATE;
3146   } else {
3147     return 0;
3148   }
3149 }
3150
3151 // Function to test for conditions that indicate we should loop
3152 // back and recode a frame.
3153 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3154                             int maxq, int minq) {
3155   const RATE_CONTROL *const rc = &cpi->rc;
3156   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3157   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3158   int force_recode = 0;
3159
3160   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3161       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3162       (two_pass_first_group_inter(cpi) &&
3163        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3164       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3165     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3166         scale_down(cpi, q)) {
3167       // Code this group at a lower resolution.
3168       cpi->resize_pending = 1;
3169       return 1;
3170     }
3171
3172     // Force recode for extreme overshoot.
3173     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3174         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3175          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3176       return 1;
3177     }
3178
3179     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3180     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3181         (rc->projected_frame_size < low_limit && q > minq)) {
3182       force_recode = 1;
3183     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3184       // Deal with frame undershoot and whether or not we are
3185       // below the automatically set cq level.
3186       if (q > oxcf->cq_level &&
3187           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3188         force_recode = 1;
3189       }
3190     }
3191   }
3192   return force_recode;
3193 }
3194 #endif  // !CONFIG_REALTIME_ONLY
3195
3196 static void update_ref_frames(VP9_COMP *cpi) {
3197   VP9_COMMON *const cm = &cpi->common;
3198   BufferPool *const pool = cm->buffer_pool;
3199   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3200
3201   if (cpi->rc.show_arf_as_gld) {
3202     int tmp = cpi->alt_fb_idx;
3203     cpi->alt_fb_idx = cpi->gld_fb_idx;
3204     cpi->gld_fb_idx = tmp;
3205   } else if (cm->show_existing_frame) {
3206     // Pop ARF.
3207     cpi->lst_fb_idx = cpi->alt_fb_idx;
3208     cpi->alt_fb_idx =
3209         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3210     --gf_group->stack_size;
3211   }
3212
3213   // At this point the new frame has been encoded.
3214   // If any buffer copy / swapping is signaled it should be done here.
3215   if (cm->frame_type == KEY_FRAME) {
3216     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3217                cm->new_fb_idx);
3218     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3219                cm->new_fb_idx);
3220   } else if (vp9_preserve_existing_gf(cpi)) {
3221     // We have decided to preserve the previously existing golden frame as our
3222     // new ARF frame. However, in the short term in function
3223     // vp9_get_refresh_mask() we left it in the GF slot and, if
3224     // we're updating the GF with the current decoded frame, we save it to the
3225     // ARF slot instead.
3226     // We now have to update the ARF with the current frame and swap gld_fb_idx
3227     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3228     // slot and, if we're updating the GF, the current frame becomes the new GF.
3229     int tmp;
3230
3231     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3232                cm->new_fb_idx);
3233
3234     tmp = cpi->alt_fb_idx;
3235     cpi->alt_fb_idx = cpi->gld_fb_idx;
3236     cpi->gld_fb_idx = tmp;
3237   } else { /* For non key/golden frames */
3238     if (cpi->refresh_alt_ref_frame) {
3239       int arf_idx = gf_group->top_arf_idx;
3240
3241       // Push new ARF into stack.
3242       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3243                  gf_group->stack_size);
3244       ++gf_group->stack_size;
3245
3246       assert(arf_idx < REF_FRAMES);
3247
3248       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3249       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3250              cpi->interp_filter_selected[0],
3251              sizeof(cpi->interp_filter_selected[0]));
3252
3253       cpi->alt_fb_idx = arf_idx;
3254     }
3255
3256     if (cpi->refresh_golden_frame) {
3257       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3258                  cm->new_fb_idx);
3259       if (!cpi->rc.is_src_frame_alt_ref)
3260         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3261                cpi->interp_filter_selected[0],
3262                sizeof(cpi->interp_filter_selected[0]));
3263       else
3264         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3265                cpi->interp_filter_selected[ALTREF_FRAME],
3266                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3267     }
3268   }
3269
3270   if (cpi->refresh_last_frame) {
3271     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3272                cm->new_fb_idx);
3273     if (!cpi->rc.is_src_frame_alt_ref)
3274       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3275              cpi->interp_filter_selected[0],
3276              sizeof(cpi->interp_filter_selected[0]));
3277   }
3278
3279   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3280     cpi->alt_fb_idx =
3281         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3282     --gf_group->stack_size;
3283   }
3284 }
3285
3286 void vp9_update_reference_frames(VP9_COMP *cpi) {
3287   update_ref_frames(cpi);
3288
3289 #if CONFIG_VP9_TEMPORAL_DENOISING
3290   vp9_denoiser_update_ref_frame(cpi);
3291 #endif
3292
3293   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3294 }
3295
3296 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3297   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3298   struct loopfilter *lf = &cm->lf;
3299   int is_reference_frame =
3300       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3301        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3302   if (cpi->use_svc &&
3303       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3304     is_reference_frame = !cpi->svc.non_reference_frame;
3305
3306   // Skip loop filter in show_existing_frame mode.
3307   if (cm->show_existing_frame) {
3308     lf->filter_level = 0;
3309     return;
3310   }
3311
3312   if (xd->lossless) {
3313     lf->filter_level = 0;
3314     lf->last_filt_level = 0;
3315   } else {
3316     struct vpx_usec_timer timer;
3317
3318     vpx_clear_system_state();
3319
3320     vpx_usec_timer_start(&timer);
3321
3322     if (!cpi->rc.is_src_frame_alt_ref) {
3323       if ((cpi->common.frame_type == KEY_FRAME) &&
3324           (!cpi->rc.this_key_frame_forced)) {
3325         lf->last_filt_level = 0;
3326       }
3327       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3328       lf->last_filt_level = lf->filter_level;
3329     } else {
3330       lf->filter_level = 0;
3331     }
3332
3333     vpx_usec_timer_mark(&timer);
3334     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3335   }
3336
3337   if (lf->filter_level > 0 && is_reference_frame) {
3338     vp9_build_mask_frame(cm, lf->filter_level, 0);
3339
3340     if (cpi->num_workers > 1)
3341       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3342                                lf->filter_level, 0, 0, cpi->workers,
3343                                cpi->num_workers, &cpi->lf_row_sync);
3344     else
3345       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3346   }
3347
3348   vpx_extend_frame_inner_borders(cm->frame_to_show);
3349 }
3350
3351 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3352   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3353   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3354       new_fb_ptr->mi_cols < cm->mi_cols) {
3355     vpx_free(new_fb_ptr->mvs);
3356     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3357                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3358                                          sizeof(*new_fb_ptr->mvs)));
3359     new_fb_ptr->mi_rows = cm->mi_rows;
3360     new_fb_ptr->mi_cols = cm->mi_cols;
3361   }
3362 }
3363
3364 void vp9_scale_references(VP9_COMP *cpi) {
3365   VP9_COMMON *cm = &cpi->common;
3366   MV_REFERENCE_FRAME ref_frame;
3367   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3368                                      VP9_ALT_FLAG };
3369
3370   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3371     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3372     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3373       BufferPool *const pool = cm->buffer_pool;
3374       const YV12_BUFFER_CONFIG *const ref =
3375           get_ref_frame_buffer(cpi, ref_frame);
3376
3377       if (ref == NULL) {
3378         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3379         continue;
3380       }
3381
3382 #if CONFIG_VP9_HIGHBITDEPTH
3383       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3384         RefCntBuffer *new_fb_ptr = NULL;
3385         int force_scaling = 0;
3386         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3387         if (new_fb == INVALID_IDX) {
3388           new_fb = get_free_fb(cm);
3389           force_scaling = 1;
3390         }
3391         if (new_fb == INVALID_IDX) return;
3392         new_fb_ptr = &pool->frame_bufs[new_fb];
3393         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3394             new_fb_ptr->buf.y_crop_height != cm->height) {
3395           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3396                                        cm->subsampling_x, cm->subsampling_y,
3397                                        cm->use_highbitdepth,
3398                                        VP9_ENC_BORDER_IN_PIXELS,
3399                                        cm->byte_alignment, NULL, NULL, NULL))
3400             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3401                                "Failed to allocate frame buffer");
3402           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3403                                  EIGHTTAP, 0);
3404           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3405           alloc_frame_mvs(cm, new_fb);
3406         }
3407 #else
3408       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3409         RefCntBuffer *new_fb_ptr = NULL;
3410         int force_scaling = 0;
3411         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3412         if (new_fb == INVALID_IDX) {
3413           new_fb = get_free_fb(cm);
3414           force_scaling = 1;
3415         }
3416         if (new_fb == INVALID_IDX) return;
3417         new_fb_ptr = &pool->frame_bufs[new_fb];
3418         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3419             new_fb_ptr->buf.y_crop_height != cm->height) {
3420           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3421                                        cm->subsampling_x, cm->subsampling_y,
3422                                        VP9_ENC_BORDER_IN_PIXELS,
3423                                        cm->byte_alignment, NULL, NULL, NULL))
3424             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3425                                "Failed to allocate frame buffer");
3426           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3427           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3428           alloc_frame_mvs(cm, new_fb);
3429         }
3430 #endif  // CONFIG_VP9_HIGHBITDEPTH
3431       } else {
3432         int buf_idx;
3433         RefCntBuffer *buf = NULL;
3434         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3435           // Check for release of scaled reference.
3436           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3437           if (buf_idx != INVALID_IDX) {
3438             buf = &pool->frame_bufs[buf_idx];
3439             --buf->ref_count;
3440             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3441           }
3442         }
3443         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3444         buf = &pool->frame_bufs[buf_idx];
3445         buf->buf.y_crop_width = ref->y_crop_width;
3446         buf->buf.y_crop_height = ref->y_crop_height;
3447         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3448         ++buf->ref_count;
3449       }
3450     } else {
3451       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3452         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3453     }
3454   }
3455 }
3456
3457 static void release_scaled_references(VP9_COMP *cpi) {
3458   VP9_COMMON *cm = &cpi->common;
3459   int i;
3460   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3461     // Only release scaled references under certain conditions:
3462     // if reference will be updated, or if scaled reference has same resolution.
3463     int refresh[3];
3464     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3465     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3466     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3467     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3468       const int idx = cpi->scaled_ref_idx[i - 1];
3469       if (idx != INVALID_IDX) {
3470         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3471         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3472         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3473                                buf->buf.y_crop_height == ref->y_crop_height)) {
3474           --buf->ref_count;
3475           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3476         }
3477       }
3478     }
3479   } else {
3480     for (i = 0; i < REFS_PER_FRAME; ++i) {
3481       const int idx = cpi->scaled_ref_idx[i];
3482       if (idx != INVALID_IDX) {
3483         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3484         --buf->ref_count;
3485         cpi->scaled_ref_idx[i] = INVALID_IDX;
3486       }
3487     }
3488   }
3489 }
3490
3491 static void full_to_model_count(unsigned int *model_count,
3492                                 unsigned int *full_count) {
3493   int n;
3494   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3495   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3496   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3497   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3498     model_count[TWO_TOKEN] += full_count[n];
3499   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3500 }
3501
3502 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3503                                  vp9_coeff_count *full_count) {
3504   int i, j, k, l;
3505
3506   for (i = 0; i < PLANE_TYPES; ++i)
3507     for (j = 0; j < REF_TYPES; ++j)
3508       for (k = 0; k < COEF_BANDS; ++k)
3509         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3510           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3511 }
3512
3513 #if 0 && CONFIG_INTERNAL_STATS
3514 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3515   VP9_COMMON *const cm = &cpi->common;
3516   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3517   int64_t recon_err;
3518
3519   vpx_clear_system_state();
3520
3521 #if CONFIG_VP9_HIGHBITDEPTH
3522   if (cm->use_highbitdepth) {
3523     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3524   } else {
3525     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3526   }
3527 #else
3528   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3529 #endif  // CONFIG_VP9_HIGHBITDEPTH
3530
3531
3532   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3533     double dc_quant_devisor;
3534 #if CONFIG_VP9_HIGHBITDEPTH
3535     switch (cm->bit_depth) {
3536       case VPX_BITS_8:
3537         dc_quant_devisor = 4.0;
3538         break;
3539       case VPX_BITS_10:
3540         dc_quant_devisor = 16.0;
3541         break;
3542       default:
3543         assert(cm->bit_depth == VPX_BITS_12);
3544         dc_quant_devisor = 64.0;
3545         break;
3546     }
3547 #else
3548     dc_quant_devisor = 4.0;
3549 #endif
3550
3551     if (!cm->current_video_frame) {
3552       fprintf(f, "frame, width, height, last ts, last end ts, "
3553           "source_alt_ref_pending, source_alt_ref_active, "
3554           "this_frame_target, projected_frame_size, "
3555           "projected_frame_size / MBs, "
3556           "projected_frame_size - this_frame_target, "
3557           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3558           "twopass.extend_minq, twopass.extend_minq_fast, "
3559           "total_target_vs_actual, "
3560           "starting_buffer_level - bits_off_target, "
3561           "total_actual_bits, base_qindex, q for base_qindex, "
3562           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3563           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3564           "frame_type, gfu_boost, "
3565           "twopass.bits_left, "
3566           "twopass.total_left_stats.coded_error, "
3567           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3568           "tot_recode_hits, recon_err, kf_boost, "
3569           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3570           "filter_level, seg.aq_av_offset\n");
3571     }
3572
3573     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3574         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3575         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3576         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3577         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3578         cpi->common.current_video_frame,
3579         cm->width, cm->height,
3580         cpi->last_time_stamp_seen,
3581         cpi->last_end_time_stamp_seen,
3582         cpi->rc.source_alt_ref_pending,
3583         cpi->rc.source_alt_ref_active,
3584         cpi->rc.this_frame_target,
3585         cpi->rc.projected_frame_size,
3586         cpi->rc.projected_frame_size / cpi->common.MBs,
3587         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3588         cpi->rc.vbr_bits_off_target,
3589         cpi->rc.vbr_bits_off_target_fast,
3590         cpi->twopass.extend_minq,
3591         cpi->twopass.extend_minq_fast,
3592         cpi->rc.total_target_vs_actual,
3593         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3594         cpi->rc.total_actual_bits, cm->base_qindex,
3595         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3596         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3597             dc_quant_devisor,
3598         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3599                                 cm->bit_depth),
3600         cpi->rc.avg_q,
3601         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3602         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3603         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3604         cpi->twopass.bits_left,
3605         cpi->twopass.total_left_stats.coded_error,
3606         cpi->twopass.bits_left /
3607             (1 + cpi->twopass.total_left_stats.coded_error),
3608         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3609         cpi->twopass.kf_zeromotion_pct,
3610         cpi->twopass.fr_content_type,
3611         cm->lf.filter_level,
3612         cm->seg.aq_av_offset);
3613   }
3614   fclose(f);
3615
3616   if (0) {
3617     FILE *const fmodes = fopen("Modes.stt", "a");
3618     int i;
3619
3620     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3621             cm->frame_type, cpi->refresh_golden_frame,
3622             cpi->refresh_alt_ref_frame);
3623
3624     for (i = 0; i < MAX_MODES; ++i)
3625       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3626
3627     fprintf(fmodes, "\n");
3628
3629     fclose(fmodes);
3630   }
3631 }
3632 #endif
3633
3634 static void set_mv_search_params(VP9_COMP *cpi) {
3635   const VP9_COMMON *const cm = &cpi->common;
3636   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3637
3638   // Default based on max resolution.
3639   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3640
3641   if (cpi->sf.mv.auto_mv_step_size) {
3642     if (frame_is_intra_only(cm)) {
3643       // Initialize max_mv_magnitude for use in the first INTER frame
3644       // after a key/intra-only frame.
3645       cpi->max_mv_magnitude = max_mv_def;
3646     } else {
3647       if (cm->show_frame) {
3648         // Allow mv_steps to correspond to twice the max mv magnitude found
3649         // in the previous frame, capped by the default max_mv_magnitude based
3650         // on resolution.
3651         cpi->mv_step_param = vp9_init_search_range(
3652             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3653       }
3654       cpi->max_mv_magnitude = 0;
3655     }
3656   }
3657 }
3658
3659 static void set_size_independent_vars(VP9_COMP *cpi) {
3660   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3661   vp9_set_rd_speed_thresholds(cpi);
3662   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3663   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3664 }
3665
3666 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3667                                     int *top_index) {
3668   VP9_COMMON *const cm = &cpi->common;
3669
3670   // Setup variables that depend on the dimensions of the frame.
3671   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3672
3673   // Decide q and q bounds.
3674   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3675
3676   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3677     *q = cpi->rc.worst_quality;
3678     cpi->rc.force_max_q = 0;
3679   }
3680
3681   if (!frame_is_intra_only(cm)) {
3682     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3683   }
3684
3685 #if !CONFIG_REALTIME_ONLY
3686   // Configure experimental use of segmentation for enhanced coding of
3687   // static regions if indicated.
3688   // Only allowed in the second pass of a two pass encode, as it requires
3689   // lagged coding, and if the relevant speed feature flag is set.
3690   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3691     configure_static_seg_features(cpi);
3692 #endif  // !CONFIG_REALTIME_ONLY
3693
3694 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3695   if (cpi->oxcf.noise_sensitivity > 0) {
3696     int l = 0;
3697     switch (cpi->oxcf.noise_sensitivity) {
3698       case 1: l = 20; break;
3699       case 2: l = 40; break;
3700       case 3: l = 60; break;
3701       case 4:
3702       case 5: l = 100; break;
3703       case 6: l = 150; break;
3704     }
3705     if (!cpi->common.postproc_state.limits) {
3706       cpi->common.postproc_state.limits =
3707           vpx_calloc(cpi->un_scaled_source->y_width,
3708                      sizeof(*cpi->common.postproc_state.limits));
3709     }
3710     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3711                 cpi->common.postproc_state.limits);
3712   }
3713 #endif  // CONFIG_VP9_POSTPROC
3714 }
3715
3716 static void init_motion_estimation(VP9_COMP *cpi) {
3717   int y_stride = cpi->scaled_source.y_stride;
3718
3719   if (cpi->sf.mv.search_method == NSTEP) {
3720     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3721   } else if (cpi->sf.mv.search_method == DIAMOND) {
3722     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3723   }
3724 }
3725
3726 static void set_frame_size(VP9_COMP *cpi) {
3727   int ref_frame;
3728   VP9_COMMON *const cm = &cpi->common;
3729   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3730   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3731
3732 #if !CONFIG_REALTIME_ONLY
3733   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3734       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3735        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3736     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3737                          &oxcf->scaled_frame_height);
3738
3739     // There has been a change in frame size.
3740     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3741                          oxcf->scaled_frame_height);
3742   }
3743 #endif  // !CONFIG_REALTIME_ONLY
3744
3745   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc &&
3746       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3747     oxcf->scaled_frame_width =
3748         (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3749     oxcf->scaled_frame_height =
3750         (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3751     // There has been a change in frame size.
3752     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3753                          oxcf->scaled_frame_height);
3754
3755     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3756     set_mv_search_params(cpi);
3757
3758     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3759 #if CONFIG_VP9_TEMPORAL_DENOISING
3760     // Reset the denoiser on the resized frame.
3761     if (cpi->oxcf.noise_sensitivity > 0) {
3762       vp9_denoiser_free(&(cpi->denoiser));
3763       setup_denoiser_buffer(cpi);
3764       // Dynamic resize is only triggered for non-SVC, so we can force
3765       // golden frame update here as temporary fix to denoiser.
3766       cpi->refresh_golden_frame = 1;
3767     }
3768 #endif
3769   }
3770
3771   if ((oxcf->pass == 2) && !cpi->use_svc) {
3772     vp9_set_target_rate(cpi);
3773   }
3774
3775   alloc_frame_mvs(cm, cm->new_fb_idx);
3776
3777   // Reset the frame pointers to the current frame size.
3778   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3779                                cm->subsampling_x, cm->subsampling_y,
3780 #if CONFIG_VP9_HIGHBITDEPTH
3781                                cm->use_highbitdepth,
3782 #endif
3783                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3784                                NULL, NULL, NULL))
3785     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3786                        "Failed to allocate frame buffer");
3787
3788   alloc_util_frame_buffers(cpi);
3789   init_motion_estimation(cpi);
3790
3791   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3792     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3793     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3794
3795     ref_buf->idx = buf_idx;
3796
3797     if (buf_idx != INVALID_IDX) {
3798       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3799       ref_buf->buf = buf;
3800 #if CONFIG_VP9_HIGHBITDEPTH
3801       vp9_setup_scale_factors_for_frame(
3802           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3803           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3804 #else
3805       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3806                                         buf->y_crop_height, cm->width,
3807                                         cm->height);
3808 #endif  // CONFIG_VP9_HIGHBITDEPTH
3809       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3810     } else {
3811       ref_buf->buf = NULL;
3812     }
3813   }
3814
3815   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3816 }
3817
3818 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3819 static void save_encode_params(VP9_COMP *cpi) {
3820   VP9_COMMON *const cm = &cpi->common;
3821   const int tile_cols = 1 << cm->log2_tile_cols;
3822   const int tile_rows = 1 << cm->log2_tile_rows;
3823   int tile_col, tile_row;
3824   int i, j;
3825   RD_OPT *rd_opt = &cpi->rd;
3826   for (i = 0; i < MAX_REF_FRAMES; i++) {
3827     for (j = 0; j < REFERENCE_MODES; j++)
3828       rd_opt->prediction_type_threshes_prev[i][j] =
3829           rd_opt->prediction_type_threshes[i][j];
3830
3831     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3832       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3833   }
3834
3835   if (cpi->tile_data != NULL) {
3836     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3837       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3838         TileDataEnc *tile_data =
3839             &cpi->tile_data[tile_row * tile_cols + tile_col];
3840         for (i = 0; i < BLOCK_SIZES; ++i) {
3841           for (j = 0; j < MAX_MODES; ++j) {
3842             tile_data->thresh_freq_fact_prev[i][j] =
3843                 tile_data->thresh_freq_fact[i][j];
3844           }
3845         }
3846       }
3847   }
3848 }
3849 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3850
3851 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3852 #ifdef ENABLE_KF_DENOISE
3853   if (is_spatial_denoise_enabled(cpi)) {
3854     cpi->raw_source_frame = vp9_scale_if_required(
3855         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3856         (oxcf->pass == 0), EIGHTTAP, 0);
3857   } else {
3858     cpi->raw_source_frame = cpi->Source;
3859   }
3860 #else
3861   cpi->raw_source_frame = cpi->Source;
3862 #endif
3863 }
3864
3865 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3866                                       uint8_t *dest) {
3867   VP9_COMMON *const cm = &cpi->common;
3868   SVC *const svc = &cpi->svc;
3869   int q = 0, bottom_index = 0, top_index = 0;
3870   int no_drop_scene_change = 0;
3871   const INTERP_FILTER filter_scaler =
3872       (is_one_pass_cbr_svc(cpi))
3873           ? svc->downsample_filter_type[svc->spatial_layer_id]
3874           : EIGHTTAP;
3875   const int phase_scaler =
3876       (is_one_pass_cbr_svc(cpi))
3877           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3878           : 0;
3879
3880   if (cm->show_existing_frame) {
3881     cpi->rc.this_frame_target = 0;
3882     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3883     return 1;
3884   }
3885
3886   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3887
3888   // Flag to check if its valid to compute the source sad (used for
3889   // scene detection and for superblock content state in CBR mode).
3890   // The flag may get reset below based on SVC or resizing state.
3891   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3892
3893   vpx_clear_system_state();
3894
3895   set_frame_size(cpi);
3896
3897   if (is_one_pass_cbr_svc(cpi) &&
3898       cpi->un_scaled_source->y_width == cm->width << 2 &&
3899       cpi->un_scaled_source->y_height == cm->height << 2 &&
3900       svc->scaled_temp.y_width == cm->width << 1 &&
3901       svc->scaled_temp.y_height == cm->height << 1) {
3902     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3903     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3904     // result will be saved in scaled_temp and might be used later.
3905     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3906     const int phase_scaler2 = svc->downsample_filter_phase[1];
3907     cpi->Source = vp9_svc_twostage_scale(
3908         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3909         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3910     svc->scaled_one_half = 1;
3911   } else if (is_one_pass_cbr_svc(cpi) &&
3912              cpi->un_scaled_source->y_width == cm->width << 1 &&
3913              cpi->un_scaled_source->y_height == cm->height << 1 &&
3914              svc->scaled_one_half) {
3915     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3916     // two-stage scaling, use the result directly.
3917     cpi->Source = &svc->scaled_temp;
3918     svc->scaled_one_half = 0;
3919   } else {
3920     cpi->Source = vp9_scale_if_required(
3921         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3922         filter_scaler, phase_scaler);
3923   }
3924 #ifdef OUTPUT_YUV_SVC_SRC
3925   // Write out at most 3 spatial layers.
3926   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3927     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3928   }
3929 #endif
3930   // Unfiltered raw source used in metrics calculation if the source
3931   // has been filtered.
3932   if (is_psnr_calc_enabled(cpi)) {
3933 #ifdef ENABLE_KF_DENOISE
3934     if (is_spatial_denoise_enabled(cpi)) {
3935       cpi->raw_source_frame = vp9_scale_if_required(
3936           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3937           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3938     } else {
3939       cpi->raw_source_frame = cpi->Source;
3940     }
3941 #else
3942     cpi->raw_source_frame = cpi->Source;
3943 #endif
3944   }
3945
3946   if ((cpi->use_svc &&
3947        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3948         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3949         svc->current_superframe < 1)) ||
3950       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3951       cpi->resize_state != ORIG) {
3952     cpi->compute_source_sad_onepass = 0;
3953     if (cpi->content_state_sb_fd != NULL)
3954       memset(cpi->content_state_sb_fd, 0,
3955              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3956                  sizeof(*cpi->content_state_sb_fd));
3957   }
3958
3959   // Avoid scaling last_source unless its needed.
3960   // Last source is needed if avg_source_sad() is used, or if
3961   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3962   // estimation is enabled.
3963   if (cpi->unscaled_last_source != NULL &&
3964       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3965        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3966         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3967        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3968        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3969        cpi->compute_source_sad_onepass))
3970     cpi->Last_Source = vp9_scale_if_required(
3971         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3972         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3973
3974   if (cpi->Last_Source == NULL ||
3975       cpi->Last_Source->y_width != cpi->Source->y_width ||
3976       cpi->Last_Source->y_height != cpi->Source->y_height)
3977     cpi->compute_source_sad_onepass = 0;
3978
3979   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3980     memset(cpi->consec_zero_mv, 0,
3981            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3982   }
3983
3984 #if CONFIG_VP9_TEMPORAL_DENOISING
3985   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3986     vp9_denoiser_reset_on_first_frame(cpi);
3987 #endif
3988
3989   // Scene detection is always used for VBR mode or screen-content case.
3990   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
3991   // (need to check encoding time cost for doing this for speed 8).
3992   cpi->rc.high_source_sad = 0;
3993   cpi->rc.hybrid_intra_scene_change = 0;
3994   cpi->rc.re_encode_maxq_scene_change = 0;
3995   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
3996       (cpi->oxcf.rc_mode == VPX_VBR ||
3997        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3998        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
3999     vp9_scene_detection_onepass(cpi);
4000
4001   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
4002     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
4003     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
4004     // On scene change reset temporal layer pattern to TL0.
4005     // Note that if the base/lower spatial layers are skipped: instead of
4006     // inserting base layer here, we force max-q for the next superframe
4007     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
4008     // when max-q is decided for the current layer.
4009     // Only do this reset for bypass/flexible mode.
4010     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
4011         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
4012       // rc->high_source_sad will get reset so copy it to restore it.
4013       int tmp_high_source_sad = cpi->rc.high_source_sad;
4014       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
4015       cpi->rc.high_source_sad = tmp_high_source_sad;
4016     }
4017   }
4018
4019   vp9_update_noise_estimate(cpi);
4020
4021   // For 1 pass CBR, check if we are dropping this frame.
4022   // Never drop on key frame, if base layer is key for svc,
4023   // on scene change, or if superframe has layer sync.
4024   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
4025       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
4026     no_drop_scene_change = 1;
4027   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
4028       !frame_is_intra_only(cm) && !no_drop_scene_change &&
4029       !svc->superframe_has_layer_sync &&
4030       (!cpi->use_svc ||
4031        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
4032     if (vp9_rc_drop_frame(cpi)) return 0;
4033   }
4034
4035   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
4036   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
4037   // avoid this frame-level upsampling (for non intra_only frames).
4038   if (frame_is_intra_only(cm) == 0 &&
4039       !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
4040     vp9_scale_references(cpi);
4041   }
4042
4043   set_size_independent_vars(cpi);
4044   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4045
4046   // search method and step parameter might be changed in speed settings.
4047   init_motion_estimation(cpi);
4048
4049   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
4050
4051   if (cpi->sf.svc_use_lowres_part &&
4052       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
4053     if (svc->prev_partition_svc == NULL) {
4054       CHECK_MEM_ERROR(
4055           cm, svc->prev_partition_svc,
4056           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
4057                                    sizeof(*svc->prev_partition_svc)));
4058     }
4059   }
4060
4061   // TODO(jianj): Look into issue of skin detection with high bitdepth.
4062   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
4063       cpi->oxcf.rc_mode == VPX_CBR &&
4064       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
4065       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4066     cpi->use_skin_detection = 1;
4067   }
4068
4069   // Enable post encode frame dropping for CBR on non key frame, when
4070   // ext_use_post_encode_drop is specified by user.
4071   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
4072                                  cpi->oxcf.rc_mode == VPX_CBR &&
4073                                  cm->frame_type != KEY_FRAME;
4074
4075   vp9_set_quantizer(cpi, q);
4076   vp9_set_variance_partition_thresholds(cpi, q, 0);
4077
4078   setup_frame(cpi);
4079
4080   suppress_active_map(cpi);
4081
4082   if (cpi->use_svc) {
4083     // On non-zero spatial layer, check for disabling inter-layer
4084     // prediction.
4085     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4086     vp9_svc_assert_constraints_pattern(cpi);
4087   }
4088
4089   if (cpi->rc.last_post_encode_dropped_scene_change) {
4090     cpi->rc.high_source_sad = 1;
4091     svc->high_source_sad_superframe = 1;
4092     // For now disable use_source_sad since Last_Source will not be the previous
4093     // encoded but the dropped one.
4094     cpi->sf.use_source_sad = 0;
4095     cpi->rc.last_post_encode_dropped_scene_change = 0;
4096   }
4097   // Check if this high_source_sad (scene/slide change) frame should be
4098   // encoded at high/max QP, and if so, set the q and adjust some rate
4099   // control parameters.
4100   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4101       (cpi->rc.high_source_sad ||
4102        (cpi->use_svc && svc->high_source_sad_superframe))) {
4103     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4104       vp9_set_quantizer(cpi, q);
4105       vp9_set_variance_partition_thresholds(cpi, q, 0);
4106     }
4107   }
4108
4109 #if !CONFIG_REALTIME_ONLY
4110   // Variance adaptive and in frame q adjustment experiments are mutually
4111   // exclusive.
4112   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4113     vp9_vaq_frame_setup(cpi);
4114   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4115     vp9_360aq_frame_setup(cpi);
4116   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4117     vp9_setup_in_frame_q_adj(cpi);
4118   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4119     // it may be pretty bad for rate-control,
4120     // and I should handle it somehow
4121     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4122   } else {
4123 #endif
4124     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4125       vp9_cyclic_refresh_setup(cpi);
4126     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4127       apply_roi_map(cpi);
4128     }
4129 #if !CONFIG_REALTIME_ONLY
4130   }
4131 #endif
4132
4133   apply_active_map(cpi);
4134
4135   vp9_encode_frame(cpi);
4136
4137   // Check if we should re-encode this frame at high Q because of high
4138   // overshoot based on the encoded frame size. Only for frames where
4139   // high temporal-source SAD is detected.
4140   // For SVC: all spatial layers are checked for re-encoding.
4141   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4142       (cpi->rc.high_source_sad ||
4143        (cpi->use_svc && svc->high_source_sad_superframe))) {
4144     int frame_size = 0;
4145     // Get an estimate of the encoded frame size.
4146     save_coding_context(cpi);
4147     vp9_pack_bitstream(cpi, dest, size);
4148     restore_coding_context(cpi);
4149     frame_size = (int)(*size) << 3;
4150     // Check if encoded frame will overshoot too much, and if so, set the q and
4151     // adjust some rate control parameters, and return to re-encode the frame.
4152     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4153       vpx_clear_system_state();
4154       vp9_set_quantizer(cpi, q);
4155       vp9_set_variance_partition_thresholds(cpi, q, 0);
4156       suppress_active_map(cpi);
4157       // Turn-off cyclic refresh for re-encoded frame.
4158       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4159         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4160         unsigned char *const seg_map = cpi->segmentation_map;
4161         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4162         memset(cr->last_coded_q_map, MAXQ,
4163                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4164         cr->sb_index = 0;
4165         vp9_disable_segmentation(&cm->seg);
4166       }
4167       apply_active_map(cpi);
4168       vp9_encode_frame(cpi);
4169     }
4170   }
4171
4172   // Update some stats from cyclic refresh, and check for golden frame update.
4173   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4174       !frame_is_intra_only(cm))
4175     vp9_cyclic_refresh_postencode(cpi);
4176
4177   // Update the skip mb flag probabilities based on the distribution
4178   // seen in the last encoder iteration.
4179   // update_base_skip_probs(cpi);
4180   vpx_clear_system_state();
4181   return 1;
4182 }
4183
4184 #if !CONFIG_REALTIME_ONLY
4185 #define MAX_QSTEP_ADJ 4
4186 static int get_qstep_adj(int rate_excess, int rate_limit) {
4187   int qstep =
4188       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4189   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4190 }
4191
4192 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4193                                     uint8_t *dest) {
4194   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4195   VP9_COMMON *const cm = &cpi->common;
4196   RATE_CONTROL *const rc = &cpi->rc;
4197   int bottom_index, top_index;
4198   int loop_count = 0;
4199   int loop_at_this_size = 0;
4200   int loop = 0;
4201   int overshoot_seen = 0;
4202   int undershoot_seen = 0;
4203   int frame_over_shoot_limit;
4204   int frame_under_shoot_limit;
4205   int q = 0, q_low = 0, q_high = 0;
4206   int enable_acl;
4207 #ifdef AGGRESSIVE_VBR
4208   int qrange_adj = 1;
4209 #endif
4210
4211   if (cm->show_existing_frame) {
4212     rc->this_frame_target = 0;
4213     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4214     return;
4215   }
4216
4217   set_size_independent_vars(cpi);
4218
4219   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4220                                        (cpi->twopass.gf_group.index == 1)
4221                                  : 0;
4222
4223   do {
4224     vpx_clear_system_state();
4225
4226     set_frame_size(cpi);
4227
4228     if (loop_count == 0 || cpi->resize_pending != 0) {
4229       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4230
4231 #ifdef AGGRESSIVE_VBR
4232       if (two_pass_first_group_inter(cpi)) {
4233         // Adjustment limits for min and max q
4234         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4235
4236         bottom_index =
4237             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4238         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4239       }
4240 #endif
4241       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4242       set_mv_search_params(cpi);
4243
4244       // Reset the loop state for new frame size.
4245       overshoot_seen = 0;
4246       undershoot_seen = 0;
4247
4248       // Reconfiguration for change in frame size has concluded.
4249       cpi->resize_pending = 0;
4250
4251       q_low = bottom_index;
4252       q_high = top_index;
4253
4254       loop_at_this_size = 0;
4255     }
4256
4257     // Decide frame size bounds first time through.
4258     if (loop_count == 0) {
4259       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4260                                        &frame_under_shoot_limit,
4261                                        &frame_over_shoot_limit);
4262     }
4263
4264     cpi->Source =
4265         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4266                               (oxcf->pass == 0), EIGHTTAP, 0);
4267
4268     // Unfiltered raw source used in metrics calculation if the source
4269     // has been filtered.
4270     if (is_psnr_calc_enabled(cpi)) {
4271 #ifdef ENABLE_KF_DENOISE
4272       if (is_spatial_denoise_enabled(cpi)) {
4273         cpi->raw_source_frame = vp9_scale_if_required(
4274             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4275             (oxcf->pass == 0), EIGHTTAP, 0);
4276       } else {
4277         cpi->raw_source_frame = cpi->Source;
4278       }
4279 #else
4280       cpi->raw_source_frame = cpi->Source;
4281 #endif
4282     }
4283
4284     if (cpi->unscaled_last_source != NULL)
4285       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4286                                                &cpi->scaled_last_source,
4287                                                (oxcf->pass == 0), EIGHTTAP, 0);
4288
4289     if (frame_is_intra_only(cm) == 0) {
4290       if (loop_count > 0) {
4291         release_scaled_references(cpi);
4292       }
4293       vp9_scale_references(cpi);
4294     }
4295
4296 #if CONFIG_RATE_CTRL
4297     // TODO(angiebird): This is a hack for making sure the encoder use the
4298     // external_quantize_index exactly. Avoid this kind of hack later.
4299     if (cpi->encode_command.use_external_quantize_index) {
4300       q = cpi->encode_command.external_quantize_index;
4301     }
4302 #endif
4303
4304     vp9_set_quantizer(cpi, q);
4305
4306     if (loop_count == 0) setup_frame(cpi);
4307
4308     // Variance adaptive and in frame q adjustment experiments are mutually
4309     // exclusive.
4310     if (oxcf->aq_mode == VARIANCE_AQ) {
4311       vp9_vaq_frame_setup(cpi);
4312     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4313       vp9_360aq_frame_setup(cpi);
4314     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4315       vp9_setup_in_frame_q_adj(cpi);
4316     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4317       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4318     } else if (oxcf->aq_mode == PSNR_AQ) {
4319       vp9_psnr_aq_mode_setup(&cm->seg);
4320     }
4321
4322     vp9_encode_frame(cpi);
4323
4324     // Update the skip mb flag probabilities based on the distribution
4325     // seen in the last encoder iteration.
4326     // update_base_skip_probs(cpi);
4327
4328     vpx_clear_system_state();
4329
4330     // Dummy pack of the bitstream using up to date stats to get an
4331     // accurate estimate of output frame size to determine if we need
4332     // to recode.
4333     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4334       save_coding_context(cpi);
4335       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4336
4337       rc->projected_frame_size = (int)(*size) << 3;
4338
4339       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4340     }
4341
4342 #if CONFIG_RATE_CTRL
4343     // This part needs to be after save_coding_context() because
4344     // restore_coding_context will be called in the end of this function.
4345     // TODO(angiebird): This is a hack for making sure the encoder use the
4346     // external_quantize_index exactly. Avoid this kind of hack later.
4347     if (cpi->encode_command.use_external_quantize_index) {
4348       break;
4349     }
4350 #endif
4351
4352     if (oxcf->rc_mode == VPX_Q) {
4353       loop = 0;
4354     } else {
4355       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4356           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4357         int last_q = q;
4358         int64_t kf_err;
4359
4360         int64_t high_err_target = cpi->ambient_err;
4361         int64_t low_err_target = cpi->ambient_err >> 1;
4362
4363 #if CONFIG_VP9_HIGHBITDEPTH
4364         if (cm->use_highbitdepth) {
4365           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4366         } else {
4367           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4368         }
4369 #else
4370         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4371 #endif  // CONFIG_VP9_HIGHBITDEPTH
4372
4373         // Prevent possible divide by zero error below for perfect KF
4374         kf_err += !kf_err;
4375
4376         // The key frame is not good enough or we can afford
4377         // to make it better without undue risk of popping.
4378         if ((kf_err > high_err_target &&
4379              rc->projected_frame_size <= frame_over_shoot_limit) ||
4380             (kf_err > low_err_target &&
4381              rc->projected_frame_size <= frame_under_shoot_limit)) {
4382           // Lower q_high
4383           q_high = q > q_low ? q - 1 : q_low;
4384
4385           // Adjust Q
4386           q = (int)((q * high_err_target) / kf_err);
4387           q = VPXMIN(q, (q_high + q_low) >> 1);
4388         } else if (kf_err < low_err_target &&
4389                    rc->projected_frame_size >= frame_under_shoot_limit) {
4390           // The key frame is much better than the previous frame
4391           // Raise q_low
4392           q_low = q < q_high ? q + 1 : q_high;
4393
4394           // Adjust Q
4395           q = (int)((q * low_err_target) / kf_err);
4396           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4397         }
4398
4399         // Clamp Q to upper and lower limits:
4400         q = clamp(q, q_low, q_high);
4401
4402         loop = q != last_q;
4403       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4404                                   frame_under_shoot_limit, q,
4405                                   VPXMAX(q_high, top_index), bottom_index)) {
4406         // Is the projected frame size out of range and are we allowed
4407         // to attempt to recode.
4408         int last_q = q;
4409         int retries = 0;
4410         int qstep;
4411
4412         if (cpi->resize_pending == 1) {
4413           // Change in frame size so go back around the recode loop.
4414           cpi->rc.frame_size_selector =
4415               SCALE_STEP1 - cpi->rc.frame_size_selector;
4416           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4417
4418 #if CONFIG_INTERNAL_STATS
4419           ++cpi->tot_recode_hits;
4420 #endif
4421           ++loop_count;
4422           loop = 1;
4423           continue;
4424         }
4425
4426         // Frame size out of permitted range:
4427         // Update correction factor & compute new Q to try...
4428
4429         // Frame is too large
4430         if (rc->projected_frame_size > rc->this_frame_target) {
4431           // Special case if the projected size is > the max allowed.
4432           if ((q == q_high) &&
4433               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4434                (!rc->is_src_frame_alt_ref &&
4435                 (rc->projected_frame_size >=
4436                  big_rate_miss_high_threshold(cpi))))) {
4437             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4438                                             big_rate_miss_high_threshold(cpi)));
4439             double q_val_high;
4440             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4441             q_val_high =
4442                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4443             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4444             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4445           }
4446
4447           // Raise Qlow as to at least the current value
4448           qstep =
4449               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4450           q_low = VPXMIN(q + qstep, q_high);
4451
4452           if (undershoot_seen || loop_at_this_size > 1) {
4453             // Update rate_correction_factor unless
4454             vp9_rc_update_rate_correction_factors(cpi);
4455
4456             q = (q_high + q_low + 1) / 2;
4457           } else {
4458             // Update rate_correction_factor unless
4459             vp9_rc_update_rate_correction_factors(cpi);
4460
4461             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4462                                   VPXMAX(q_high, top_index));
4463
4464             while (q < q_low && retries < 10) {
4465               vp9_rc_update_rate_correction_factors(cpi);
4466               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4467                                     VPXMAX(q_high, top_index));
4468               retries++;
4469             }
4470           }
4471
4472           overshoot_seen = 1;
4473         } else {
4474           // Frame is too small
4475           qstep =
4476               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4477           q_high = VPXMAX(q - qstep, q_low);
4478
4479           if (overshoot_seen || loop_at_this_size > 1) {
4480             vp9_rc_update_rate_correction_factors(cpi);
4481             q = (q_high + q_low) / 2;
4482           } else {
4483             vp9_rc_update_rate_correction_factors(cpi);
4484             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4485                                   VPXMIN(q_low, bottom_index), top_index);
4486             // Special case reset for qlow for constrained quality.
4487             // This should only trigger where there is very substantial
4488             // undershoot on a frame and the auto cq level is above
4489             // the user passed in value.
4490             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4491               q_low = q;
4492             }
4493
4494             while (q > q_high && retries < 10) {
4495               vp9_rc_update_rate_correction_factors(cpi);
4496               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4497                                     VPXMIN(q_low, bottom_index), top_index);
4498               retries++;
4499             }
4500           }
4501           undershoot_seen = 1;
4502         }
4503
4504         // Clamp Q to upper and lower limits:
4505         q = clamp(q, q_low, q_high);
4506
4507         loop = (q != last_q);
4508       } else {
4509         loop = 0;
4510       }
4511     }
4512
4513     // Special case for overlay frame.
4514     if (rc->is_src_frame_alt_ref &&
4515         rc->projected_frame_size < rc->max_frame_bandwidth)
4516       loop = 0;
4517
4518     if (loop) {
4519       ++loop_count;
4520       ++loop_at_this_size;
4521
4522 #if CONFIG_INTERNAL_STATS
4523       ++cpi->tot_recode_hits;
4524 #endif
4525     }
4526
4527     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4528       if (loop) restore_coding_context(cpi);
4529   } while (loop);
4530
4531 #ifdef AGGRESSIVE_VBR
4532   if (two_pass_first_group_inter(cpi)) {
4533     cpi->twopass.active_worst_quality =
4534         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4535   } else if (!frame_is_kf_gf_arf(cpi)) {
4536 #else
4537   if (!frame_is_kf_gf_arf(cpi)) {
4538 #endif
4539     // Have we been forced to adapt Q outside the expected range by an extreme
4540     // rate miss. If so adjust the active maxQ for the subsequent frames.
4541     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4542       cpi->twopass.active_worst_quality = q;
4543     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4544                rc->projected_frame_size < rc->this_frame_target) {
4545       cpi->twopass.active_worst_quality =
4546           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4547     }
4548   }
4549
4550   if (enable_acl) {
4551     // Skip recoding, if model diff is below threshold
4552     const int thresh = compute_context_model_thresh(cpi);
4553     const int diff = compute_context_model_diff(cm);
4554     if (diff >= thresh) {
4555       vp9_encode_frame(cpi);
4556     }
4557   }
4558   if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4559     vpx_clear_system_state();
4560     restore_coding_context(cpi);
4561   }
4562 }
4563 #endif  // !CONFIG_REALTIME_ONLY
4564
4565 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4566   const int *const map = cpi->common.ref_frame_map;
4567   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4568   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4569   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4570   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4571
4572   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4573
4574   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4575       (cpi->svc.number_temporal_layers == 1 &&
4576        cpi->svc.number_spatial_layers == 1))
4577     flags &= ~VP9_GOLD_FLAG;
4578
4579   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4580
4581   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4582
4583   return flags;
4584 }
4585
4586 static void set_ext_overrides(VP9_COMP *cpi) {
4587   // Overrides the defaults with the externally supplied values with
4588   // vp9_update_reference() and vp9_update_entropy() calls
4589   // Note: The overrides are valid only for the next frame passed
4590   // to encode_frame_to_data_rate() function
4591   if (cpi->ext_refresh_frame_context_pending) {
4592     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4593     cpi->ext_refresh_frame_context_pending = 0;
4594   }
4595   if (cpi->ext_refresh_frame_flags_pending) {
4596     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4597     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4598     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4599   }
4600 }
4601
4602 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4603     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4604     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4605     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4606   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4607       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4608 #if CONFIG_VP9_HIGHBITDEPTH
4609     if (cm->bit_depth == VPX_BITS_8) {
4610       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4611                                  phase_scaler2);
4612       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4613                                  phase_scaler);
4614     } else {
4615       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4616                              filter_type2, phase_scaler2);
4617       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4618                              filter_type, phase_scaler);
4619     }
4620 #else
4621     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4622                                phase_scaler2);
4623     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4624 #endif  // CONFIG_VP9_HIGHBITDEPTH
4625     return scaled;
4626   } else {
4627     return unscaled;
4628   }
4629 }
4630
4631 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4632     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4633     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4634   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4635       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4636 #if CONFIG_VP9_HIGHBITDEPTH
4637     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4638         unscaled->y_height <= (scaled->y_height << 1))
4639       if (cm->bit_depth == VPX_BITS_8)
4640         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4641       else
4642         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4643                                filter_type, phase_scaler);
4644     else
4645       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4646 #else
4647     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4648         unscaled->y_height <= (scaled->y_height << 1))
4649       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4650     else
4651       scale_and_extend_frame_nonnormative(unscaled, scaled);
4652 #endif  // CONFIG_VP9_HIGHBITDEPTH
4653     return scaled;
4654   } else {
4655     return unscaled;
4656   }
4657 }
4658
4659 static void set_ref_sign_bias(VP9_COMP *cpi) {
4660   VP9_COMMON *const cm = &cpi->common;
4661   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4662   const int cur_frame_index = ref_buffer->frame_index;
4663   MV_REFERENCE_FRAME ref_frame;
4664
4665   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4666     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4667     const RefCntBuffer *const ref_cnt_buf =
4668         get_ref_cnt_buffer(&cpi->common, buf_idx);
4669     if (ref_cnt_buf) {
4670       cm->ref_frame_sign_bias[ref_frame] =
4671           cur_frame_index < ref_cnt_buf->frame_index;
4672     }
4673   }
4674 }
4675
4676 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4677   INTERP_FILTER ifilter;
4678   int ref_total[MAX_REF_FRAMES] = { 0 };
4679   MV_REFERENCE_FRAME ref;
4680   int mask = 0;
4681   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4682     return mask;
4683   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4684     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4685       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4686
4687   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4688     if ((ref_total[LAST_FRAME] &&
4689          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4690         (ref_total[GOLDEN_FRAME] == 0 ||
4691          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4692              ref_total[GOLDEN_FRAME]) &&
4693         (ref_total[ALTREF_FRAME] == 0 ||
4694          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4695              ref_total[ALTREF_FRAME]))
4696       mask |= 1 << ifilter;
4697   }
4698   return mask;
4699 }
4700
4701 #ifdef ENABLE_KF_DENOISE
4702 // Baseline kernel weights for denoise
4703 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4704 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4705                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4706
4707 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4708                                      uint8_t point_weight, int *sum_val,
4709                                      int *sum_weight) {
4710   if (abs(centre_val - data_val) <= thresh) {
4711     *sum_weight += point_weight;
4712     *sum_val += (int)data_val * (int)point_weight;
4713   }
4714 }
4715
4716 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4717                                   const int strength) {
4718   int sum_weight = 0;
4719   int sum_val = 0;
4720   int thresh = strength;
4721   int kernal_size = 5;
4722   int half_k_size = 2;
4723   int i, j;
4724   int max_diff = 0;
4725   uint8_t *tmp_ptr;
4726   uint8_t *kernal_ptr;
4727
4728   // Find the maximum deviation from the source point in the locale.
4729   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4730   for (i = 0; i < kernal_size + 2; ++i) {
4731     for (j = 0; j < kernal_size + 2; ++j) {
4732       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4733     }
4734     tmp_ptr += stride;
4735   }
4736
4737   // Select the kernel size.
4738   if (max_diff > (strength + (strength >> 1))) {
4739     kernal_size = 3;
4740     half_k_size = 1;
4741     thresh = thresh >> 1;
4742   }
4743   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4744
4745   // Apply the kernel
4746   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4747   for (i = 0; i < kernal_size; ++i) {
4748     for (j = 0; j < kernal_size; ++j) {
4749       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4750                         &sum_val, &sum_weight);
4751       ++kernal_ptr;
4752     }
4753     tmp_ptr += stride;
4754   }
4755
4756   // Update the source value with the new filtered value
4757   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4758 }
4759
4760 #if CONFIG_VP9_HIGHBITDEPTH
4761 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4762                                          const int strength) {
4763   int sum_weight = 0;
4764   int sum_val = 0;
4765   int thresh = strength;
4766   int kernal_size = 5;
4767   int half_k_size = 2;
4768   int i, j;
4769   int max_diff = 0;
4770   uint16_t *tmp_ptr;
4771   uint8_t *kernal_ptr;
4772
4773   // Find the maximum deviation from the source point in the locale.
4774   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4775   for (i = 0; i < kernal_size + 2; ++i) {
4776     for (j = 0; j < kernal_size + 2; ++j) {
4777       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4778     }
4779     tmp_ptr += stride;
4780   }
4781
4782   // Select the kernel size.
4783   if (max_diff > (strength + (strength >> 1))) {
4784     kernal_size = 3;
4785     half_k_size = 1;
4786     thresh = thresh >> 1;
4787   }
4788   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4789
4790   // Apply the kernel
4791   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4792   for (i = 0; i < kernal_size; ++i) {
4793     for (j = 0; j < kernal_size; ++j) {
4794       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4795                         &sum_val, &sum_weight);
4796       ++kernal_ptr;
4797     }
4798     tmp_ptr += stride;
4799   }
4800
4801   // Update the source value with the new filtered value
4802   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4803 }
4804 #endif  // CONFIG_VP9_HIGHBITDEPTH
4805
4806 // Apply thresholded spatial noise suppression to a given buffer.
4807 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4808                                    const int stride, const int width,
4809                                    const int height, const int strength) {
4810   VP9_COMMON *const cm = &cpi->common;
4811   uint8_t *src_ptr = buffer;
4812   int row;
4813   int col;
4814
4815   for (row = 0; row < height; ++row) {
4816     for (col = 0; col < width; ++col) {
4817 #if CONFIG_VP9_HIGHBITDEPTH
4818       if (cm->use_highbitdepth)
4819         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
4820                                      strength);
4821       else
4822         spatial_denoise_point(&src_ptr[col], stride, strength);
4823 #else
4824       spatial_denoise_point(&src_ptr[col], stride, strength);
4825 #endif  // CONFIG_VP9_HIGHBITDEPTH
4826     }
4827     src_ptr += stride;
4828   }
4829 }
4830
4831 // Apply thresholded spatial noise suppression to source.
4832 static void spatial_denoise_frame(VP9_COMP *cpi) {
4833   YV12_BUFFER_CONFIG *src = cpi->Source;
4834   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4835   TWO_PASS *const twopass = &cpi->twopass;
4836   VP9_COMMON *const cm = &cpi->common;
4837
4838   // Base the filter strength on the current active max Q.
4839   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
4840                                               cm->bit_depth));
4841   int strength =
4842       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
4843
4844   // Denoise each of Y,U and V buffers.
4845   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
4846                          src->y_height, strength);
4847
4848   strength += (strength >> 1);
4849   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
4850                          src->uv_height, strength << 1);
4851
4852   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
4853                          src->uv_height, strength << 1);
4854 }
4855 #endif  // ENABLE_KF_DENOISE
4856
4857 #if !CONFIG_REALTIME_ONLY
4858 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
4859                                          uint8_t *dest) {
4860   if (cpi->common.seg.enabled)
4861     if (ALT_REF_AQ_PROTECT_GAIN) {
4862       size_t nsize = *size;
4863       int overhead;
4864
4865       // TODO(yuryg): optimize this, as
4866       // we don't really need to repack
4867
4868       save_coding_context(cpi);
4869       vp9_disable_segmentation(&cpi->common.seg);
4870       vp9_pack_bitstream(cpi, dest, &nsize);
4871       restore_coding_context(cpi);
4872
4873       overhead = (int)*size - (int)nsize;
4874
4875       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
4876         vp9_encode_frame(cpi);
4877       else
4878         vp9_enable_segmentation(&cpi->common.seg);
4879     }
4880 }
4881 #endif
4882
4883 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
4884   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4885
4886   if (ref_buffer) {
4887     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
4888     ref_buffer->frame_index =
4889         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
4890   }
4891 }
4892
4893 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
4894   VP9_COMMON *cm = &cpi->common;
4895   ThreadData *td = &cpi->td;
4896   MACROBLOCK *x = &td->mb;
4897   MACROBLOCKD *xd = &x->e_mbd;
4898   uint8_t *y_buffer = cpi->Source->y_buffer;
4899   const int y_stride = cpi->Source->y_stride;
4900   const int block_size = BLOCK_16X16;
4901
4902   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
4903   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
4904   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
4905   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
4906   double log_sum = 0.0;
4907   int row, col;
4908
4909   // Loop through each 64x64 block.
4910   for (row = 0; row < num_rows; ++row) {
4911     for (col = 0; col < num_cols; ++col) {
4912       int mi_row, mi_col;
4913       double var = 0.0, num_of_var = 0.0;
4914       const int index = row * num_cols + col;
4915
4916       for (mi_row = row * num_8x8_h;
4917            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
4918         for (mi_col = col * num_8x8_w;
4919              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
4920           struct buf_2d buf;
4921           const int row_offset_y = mi_row << 3;
4922           const int col_offset_y = mi_col << 3;
4923
4924           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
4925           buf.stride = y_stride;
4926
4927           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
4928           // and high bit videos, the variance needs to be divided by 2.0 or
4929           // 64.0 separately.
4930           // TODO(sdeng): need to tune for 12bit videos.
4931 #if CONFIG_VP9_HIGHBITDEPTH
4932           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
4933             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
4934           else
4935 #endif
4936             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
4937
4938           num_of_var += 1.0;
4939         }
4940       }
4941       var = var / num_of_var / 64.0;
4942
4943       // Curve fitting with an exponential model on all 16x16 blocks from the
4944       // Midres dataset.
4945       var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
4946       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
4947       log_sum += log(var);
4948     }
4949   }
4950   log_sum = exp(log_sum / (double)(num_rows * num_cols));
4951
4952   for (row = 0; row < num_rows; ++row) {
4953     for (col = 0; col < num_cols; ++col) {
4954       const int index = row * num_cols + col;
4955       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
4956     }
4957   }
4958
4959   (void)xd;
4960 }
4961
4962 // Process the wiener variance in 16x16 block basis.
4963 static int qsort_comp(const void *elem1, const void *elem2) {
4964   int a = *((const int *)elem1);
4965   int b = *((const int *)elem2);
4966   if (a > b) return 1;
4967   if (a < b) return -1;
4968   return 0;
4969 }
4970
4971 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
4972   VP9_COMMON *cm = &cpi->common;
4973
4974   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
4975       cpi->mb_wiener_var_cols >= cm->mb_cols)
4976     return;
4977
4978   vpx_free(cpi->mb_wiener_variance);
4979   cpi->mb_wiener_variance = NULL;
4980
4981   CHECK_MEM_ERROR(
4982       cm, cpi->mb_wiener_variance,
4983       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
4984   cpi->mb_wiener_var_rows = cm->mb_rows;
4985   cpi->mb_wiener_var_cols = cm->mb_cols;
4986 }
4987
4988 static void set_mb_wiener_variance(VP9_COMP *cpi) {
4989   VP9_COMMON *cm = &cpi->common;
4990   uint8_t *buffer = cpi->Source->y_buffer;
4991   int buf_stride = cpi->Source->y_stride;
4992
4993 #if CONFIG_VP9_HIGHBITDEPTH
4994   ThreadData *td = &cpi->td;
4995   MACROBLOCK *x = &td->mb;
4996   MACROBLOCKD *xd = &x->e_mbd;
4997   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
4998   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
4999   uint8_t *zero_pred;
5000 #else
5001   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
5002 #endif
5003
5004   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
5005   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
5006
5007   int mb_row, mb_col, count = 0;
5008   // Hard coded operating block size
5009   const int block_size = 16;
5010   const int coeff_count = block_size * block_size;
5011   const TX_SIZE tx_size = TX_16X16;
5012
5013 #if CONFIG_VP9_HIGHBITDEPTH
5014   xd->cur_buf = cpi->Source;
5015   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5016     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
5017     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
5018   } else {
5019     zero_pred = zero_pred8;
5020     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
5021   }
5022 #else
5023   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
5024 #endif
5025
5026   cpi->norm_wiener_variance = 0;
5027
5028   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
5029     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
5030       int idx;
5031       int16_t median_val = 0;
5032       uint8_t *mb_buffer =
5033           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
5034       int64_t wiener_variance = 0;
5035
5036 #if CONFIG_VP9_HIGHBITDEPTH
5037       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5038         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
5039                                   mb_buffer, buf_stride, zero_pred, block_size,
5040                                   xd->bd);
5041         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5042       } else {
5043         vpx_subtract_block(block_size, block_size, src_diff, block_size,
5044                            mb_buffer, buf_stride, zero_pred, block_size);
5045         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5046       }
5047 #else
5048       vpx_subtract_block(block_size, block_size, src_diff, block_size,
5049                          mb_buffer, buf_stride, zero_pred, block_size);
5050       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5051 #endif  // CONFIG_VP9_HIGHBITDEPTH
5052
5053       coeff[0] = 0;
5054       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
5055
5056       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
5057
5058       // Noise level estimation
5059       median_val = coeff[coeff_count / 2];
5060
5061       // Wiener filter
5062       for (idx = 1; idx < coeff_count; ++idx) {
5063         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
5064         int64_t tmp_coeff = (int64_t)coeff[idx];
5065         if (median_val) {
5066           tmp_coeff = (sqr_coeff * coeff[idx]) /
5067                       (sqr_coeff + (int64_t)median_val * median_val);
5068         }
5069         wiener_variance += tmp_coeff * tmp_coeff;
5070       }
5071       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
5072           wiener_variance / coeff_count;
5073       cpi->norm_wiener_variance +=
5074           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
5075       ++count;
5076     }
5077   }
5078
5079   if (count) cpi->norm_wiener_variance /= count;
5080   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
5081 }
5082
5083 static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
5084                                       uint8_t *dest,
5085                                       unsigned int *frame_flags) {
5086   VP9_COMMON *const cm = &cpi->common;
5087   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5088   struct segmentation *const seg = &cm->seg;
5089   TX_SIZE t;
5090
5091   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5092   // No need to set svc.skip_enhancement_layer if whole superframe will be
5093   // dropped.
5094   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5095       cpi->oxcf.target_bandwidth == 0 &&
5096       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5097         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5098          cpi->svc
5099              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5100                                                 1]) &&
5101         cpi->svc.drop_spatial_layer[0])) {
5102     cpi->svc.skip_enhancement_layer = 1;
5103     vp9_rc_postencode_update_drop_frame(cpi);
5104     cpi->ext_refresh_frame_flags_pending = 0;
5105     cpi->last_frame_dropped = 1;
5106     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5107     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5108     vp9_inc_frame_in_layer(cpi);
5109     return;
5110   }
5111
5112   set_ext_overrides(cpi);
5113   vpx_clear_system_state();
5114
5115 #ifdef ENABLE_KF_DENOISE
5116   // Spatial denoise of key frame.
5117   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5118 #endif
5119
5120   if (cm->show_existing_frame == 0) {
5121     // Update frame index
5122     set_frame_index(cpi, cm);
5123
5124     // Set the arf sign bias for this frame.
5125     set_ref_sign_bias(cpi);
5126   }
5127
5128   // Set default state for segment based loop filter update flags.
5129   cm->lf.mode_ref_delta_update = 0;
5130
5131   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5132     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5133
5134   // Set various flags etc to special state if it is a key frame.
5135   if (frame_is_intra_only(cm)) {
5136     // Reset the loop filter deltas and segmentation map.
5137     vp9_reset_segment_features(&cm->seg);
5138
5139     // If segmentation is enabled force a map update for key frames.
5140     if (seg->enabled) {
5141       seg->update_map = 1;
5142       seg->update_data = 1;
5143     }
5144
5145     // The alternate reference frame cannot be active for a key frame.
5146     cpi->rc.source_alt_ref_active = 0;
5147
5148     cm->error_resilient_mode = oxcf->error_resilient_mode;
5149     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5150
5151     // By default, encoder assumes decoder can use prev_mi.
5152     if (cm->error_resilient_mode) {
5153       cm->frame_parallel_decoding_mode = 1;
5154       cm->reset_frame_context = 0;
5155       cm->refresh_frame_context = 0;
5156     } else if (cm->intra_only) {
5157       // Only reset the current context.
5158       cm->reset_frame_context = 2;
5159     }
5160   }
5161
5162   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5163
5164   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5165     init_mb_wiener_var_buffer(cpi);
5166     set_mb_wiener_variance(cpi);
5167   }
5168
5169   vpx_clear_system_state();
5170
5171 #if CONFIG_INTERNAL_STATS
5172   memset(cpi->mode_chosen_counts, 0,
5173          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5174 #endif
5175 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5176   // Backup to ensure consistency between recodes
5177   save_encode_params(cpi);
5178 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5179
5180   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5181     if (!encode_without_recode_loop(cpi, size, dest)) return;
5182   } else {
5183 #if !CONFIG_REALTIME_ONLY
5184     encode_with_recode_loop(cpi, size, dest);
5185 #endif
5186   }
5187
5188   // TODO(jingning): When using show existing frame mode, we assume that the
5189   // current ARF will be directly used as the final reconstructed frame. This is
5190   // an encoder control scheme. One could in principle explore other
5191   // possibilities to arrange the reference frame buffer and their coding order.
5192   if (cm->show_existing_frame) {
5193     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5194                cm->ref_frame_map[cpi->alt_fb_idx]);
5195   }
5196
5197 #if !CONFIG_REALTIME_ONLY
5198   // Disable segmentation if it decrease rate/distortion ratio
5199   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5200     vp9_try_disable_lookahead_aq(cpi, size, dest);
5201 #endif
5202
5203 #if CONFIG_VP9_TEMPORAL_DENOISING
5204 #ifdef OUTPUT_YUV_DENOISED
5205   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5206     vpx_write_yuv_frame(yuv_denoised_file,
5207                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5208   }
5209 #endif
5210 #endif
5211 #ifdef OUTPUT_YUV_SKINMAP
5212   if (cpi->common.current_video_frame > 1) {
5213     vp9_output_skin_map(cpi, yuv_skinmap_file);
5214   }
5215 #endif
5216
5217   // Special case code to reduce pulsing when key frames are forced at a
5218   // fixed interval. Note the reconstruction error if it is the frame before
5219   // the force key frame
5220   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5221 #if CONFIG_VP9_HIGHBITDEPTH
5222     if (cm->use_highbitdepth) {
5223       cpi->ambient_err =
5224           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5225     } else {
5226       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5227     }
5228 #else
5229     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5230 #endif  // CONFIG_VP9_HIGHBITDEPTH
5231   }
5232
5233   // If the encoder forced a KEY_FRAME decision
5234   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5235
5236   cm->frame_to_show = get_frame_new_buffer(cm);
5237   cm->frame_to_show->color_space = cm->color_space;
5238   cm->frame_to_show->color_range = cm->color_range;
5239   cm->frame_to_show->render_width = cm->render_width;
5240   cm->frame_to_show->render_height = cm->render_height;
5241
5242   // Pick the loop filter level for the frame.
5243   loopfilter_frame(cpi, cm);
5244
5245   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5246
5247   // build the bitstream
5248   vp9_pack_bitstream(cpi, dest, size);
5249
5250   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5251       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5252     restore_coding_context(cpi);
5253     return;
5254   }
5255
5256   cpi->last_frame_dropped = 0;
5257   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5258   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5259     cpi->svc.num_encoded_top_layer++;
5260
5261   // Keep track of the frame buffer index updated/refreshed for the
5262   // current encoded TL0 superframe.
5263   if (cpi->svc.temporal_layer_id == 0) {
5264     if (cpi->refresh_last_frame)
5265       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5266     else if (cpi->refresh_golden_frame)
5267       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5268     else if (cpi->refresh_alt_ref_frame)
5269       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5270   }
5271
5272   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5273
5274   if (frame_is_intra_only(cm) == 0) {
5275     release_scaled_references(cpi);
5276   }
5277   vp9_update_reference_frames(cpi);
5278
5279   if (!cm->show_existing_frame) {
5280     for (t = TX_4X4; t <= TX_32X32; ++t) {
5281       full_to_model_counts(cpi->td.counts->coef[t],
5282                            cpi->td.rd_counts.coef_counts[t]);
5283     }
5284
5285     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5286       if (!frame_is_intra_only(cm)) {
5287         vp9_adapt_mode_probs(cm);
5288         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5289       }
5290       vp9_adapt_coef_probs(cm);
5291     }
5292   }
5293
5294   cpi->ext_refresh_frame_flags_pending = 0;
5295
5296   if (cpi->refresh_golden_frame == 1)
5297     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5298   else
5299     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5300
5301   if (cpi->refresh_alt_ref_frame == 1)
5302     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5303   else
5304     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5305
5306   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5307
5308   cm->last_frame_type = cm->frame_type;
5309
5310   vp9_rc_postencode_update(cpi, *size);
5311
5312   *size = VPXMAX(1, *size);
5313
5314 #if 0
5315   output_frame_level_debug_stats(cpi);
5316 #endif
5317
5318   if (cm->frame_type == KEY_FRAME) {
5319     // Tell the caller that the frame was coded as a key frame
5320     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5321   } else {
5322     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5323   }
5324
5325   // Clear the one shot update flags for segmentation map and mode/ref loop
5326   // filter deltas.
5327   cm->seg.update_map = 0;
5328   cm->seg.update_data = 0;
5329   cm->lf.mode_ref_delta_update = 0;
5330
5331   // keep track of the last coded dimensions
5332   cm->last_width = cm->width;
5333   cm->last_height = cm->height;
5334
5335   // reset to normal state now that we are done.
5336   if (!cm->show_existing_frame) {
5337     cm->last_show_frame = cm->show_frame;
5338     cm->prev_frame = cm->cur_frame;
5339   }
5340
5341   if (cm->show_frame) {
5342     vp9_swap_mi_and_prev_mi(cm);
5343     // Don't increment frame counters if this was an altref buffer
5344     // update not a real frame
5345     update_frame_indexes(cm, cm->show_frame);
5346     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5347   }
5348
5349   if (cpi->use_svc) {
5350     cpi->svc
5351         .layer_context[cpi->svc.spatial_layer_id *
5352                            cpi->svc.number_temporal_layers +
5353                        cpi->svc.temporal_layer_id]
5354         .last_frame_type = cm->frame_type;
5355     // Reset layer_sync back to 0 for next frame.
5356     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5357   }
5358
5359   cpi->force_update_segmentation = 0;
5360
5361 #if !CONFIG_REALTIME_ONLY
5362   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5363     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5364 #endif
5365
5366   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5367   cpi->svc.set_intra_only_frame = 0;
5368 }
5369
5370 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5371                       unsigned int *frame_flags) {
5372   vp9_rc_get_svc_params(cpi);
5373   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5374 }
5375
5376 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5377                         unsigned int *frame_flags) {
5378   if (cpi->oxcf.rc_mode == VPX_CBR) {
5379     vp9_rc_get_one_pass_cbr_params(cpi);
5380   } else {
5381     vp9_rc_get_one_pass_vbr_params(cpi);
5382   }
5383   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5384 }
5385
5386 #if !CONFIG_REALTIME_ONLY
5387 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5388                         unsigned int *frame_flags) {
5389   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5390 #if CONFIG_MISMATCH_DEBUG
5391   mismatch_move_frame_idx_w();
5392 #endif
5393   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5394 }
5395 #endif  // !CONFIG_REALTIME_ONLY
5396
5397 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5398                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5399                           int64_t end_time) {
5400   VP9_COMMON *const cm = &cpi->common;
5401   struct vpx_usec_timer timer;
5402   int res = 0;
5403   const int subsampling_x = sd->subsampling_x;
5404   const int subsampling_y = sd->subsampling_y;
5405 #if CONFIG_VP9_HIGHBITDEPTH
5406   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5407 #else
5408   const int use_highbitdepth = 0;
5409 #endif
5410
5411   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5412 #if CONFIG_VP9_TEMPORAL_DENOISING
5413   setup_denoiser_buffer(cpi);
5414 #endif
5415
5416   alloc_raw_frame_buffers(cpi);
5417
5418   vpx_usec_timer_start(&timer);
5419
5420   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5421                          use_highbitdepth, frame_flags))
5422     res = -1;
5423   vpx_usec_timer_mark(&timer);
5424   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5425
5426   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5427       (subsampling_x != 1 || subsampling_y != 1)) {
5428     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5429                        "Non-4:2:0 color format requires profile 1 or 3");
5430     res = -1;
5431   }
5432   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5433       (subsampling_x == 1 && subsampling_y == 1)) {
5434     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5435                        "4:2:0 color format requires profile 0 or 2");
5436     res = -1;
5437   }
5438
5439   return res;
5440 }
5441
5442 static int frame_is_reference(const VP9_COMP *cpi) {
5443   const VP9_COMMON *cm = &cpi->common;
5444
5445   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5446          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5447          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5448          cm->seg.update_map || cm->seg.update_data;
5449 }
5450
5451 static void adjust_frame_rate(VP9_COMP *cpi,
5452                               const struct lookahead_entry *source) {
5453   int64_t this_duration;
5454   int step = 0;
5455
5456   if (source->ts_start == cpi->first_time_stamp_ever) {
5457     this_duration = source->ts_end - source->ts_start;
5458     step = 1;
5459   } else {
5460     int64_t last_duration =
5461         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5462
5463     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5464
5465     // do a step update if the duration changes by 10%
5466     if (last_duration)
5467       step = (int)((this_duration - last_duration) * 10 / last_duration);
5468   }
5469
5470   if (this_duration) {
5471     if (step) {
5472       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5473     } else {
5474       // Average this frame's rate into the last second's average
5475       // frame rate. If we haven't seen 1 second yet, then average
5476       // over the whole interval seen.
5477       const double interval = VPXMIN(
5478           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5479       double avg_duration = 10000000.0 / cpi->framerate;
5480       avg_duration *= (interval - avg_duration + this_duration);
5481       avg_duration /= interval;
5482
5483       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5484     }
5485   }
5486   cpi->last_time_stamp_seen = source->ts_start;
5487   cpi->last_end_time_stamp_seen = source->ts_end;
5488 }
5489
5490 // Returns 0 if this is not an alt ref else the offset of the source frame
5491 // used as the arf midpoint.
5492 static int get_arf_src_index(VP9_COMP *cpi) {
5493   RATE_CONTROL *const rc = &cpi->rc;
5494   int arf_src_index = 0;
5495   if (is_altref_enabled(cpi)) {
5496     if (cpi->oxcf.pass == 2) {
5497       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5498       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5499         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5500       }
5501     } else if (rc->source_alt_ref_pending) {
5502       arf_src_index = rc->frames_till_gf_update_due;
5503     }
5504   }
5505   return arf_src_index;
5506 }
5507
5508 static void check_src_altref(VP9_COMP *cpi,
5509                              const struct lookahead_entry *source) {
5510   RATE_CONTROL *const rc = &cpi->rc;
5511
5512   if (cpi->oxcf.pass == 2) {
5513     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5514     rc->is_src_frame_alt_ref =
5515         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5516   } else {
5517     rc->is_src_frame_alt_ref =
5518         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5519   }
5520
5521   if (rc->is_src_frame_alt_ref) {
5522     // Current frame is an ARF overlay frame.
5523     cpi->alt_ref_source = NULL;
5524
5525     // Don't refresh the last buffer for an ARF overlay frame. It will
5526     // become the GF so preserve last as an alternative prediction option.
5527     cpi->refresh_last_frame = 0;
5528   }
5529 }
5530
5531 #if CONFIG_INTERNAL_STATS
5532 static void adjust_image_stat(double y, double u, double v, double all,
5533                               ImageStat *s) {
5534   s->stat[Y] += y;
5535   s->stat[U] += u;
5536   s->stat[V] += v;
5537   s->stat[ALL] += all;
5538   s->worst = VPXMIN(s->worst, all);
5539 }
5540 #endif  // CONFIG_INTERNAL_STATS
5541
5542 // Adjust the maximum allowable frame size for the target level.
5543 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5544   RATE_CONTROL *const rc = &cpi->rc;
5545   LevelConstraint *const ls = &cpi->level_constraint;
5546   VP9_COMMON *const cm = &cpi->common;
5547   const double max_cpb_size = ls->max_cpb_size;
5548   vpx_clear_system_state();
5549   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5550   if (frame_is_intra_only(cm)) {
5551     rc->max_frame_bandwidth =
5552         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5553   } else if (arf_src_index > 0) {
5554     rc->max_frame_bandwidth =
5555         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5556   } else {
5557     rc->max_frame_bandwidth =
5558         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5559   }
5560 }
5561
5562 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5563   VP9_COMMON *const cm = &cpi->common;
5564   Vp9LevelInfo *const level_info = &cpi->level_info;
5565   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5566   Vp9LevelStats *const level_stats = &level_info->level_stats;
5567   int i, idx;
5568   uint64_t luma_samples, dur_end;
5569   const uint32_t luma_pic_size = cm->width * cm->height;
5570   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5571   LevelConstraint *const level_constraint = &cpi->level_constraint;
5572   const int8_t level_index = level_constraint->level_index;
5573   double cpb_data_size;
5574
5575   vpx_clear_system_state();
5576
5577   // update level_stats
5578   level_stats->total_compressed_size += *size;
5579   if (cm->show_frame) {
5580     level_stats->total_uncompressed_size +=
5581         luma_pic_size +
5582         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5583     level_stats->time_encoded =
5584         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5585         (double)TICKS_PER_SEC;
5586   }
5587
5588   if (arf_src_index > 0) {
5589     if (!level_stats->seen_first_altref) {
5590       level_stats->seen_first_altref = 1;
5591     } else if (level_stats->frames_since_last_altref <
5592                level_spec->min_altref_distance) {
5593       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5594     }
5595     level_stats->frames_since_last_altref = 0;
5596   } else {
5597     ++level_stats->frames_since_last_altref;
5598   }
5599
5600   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5601     idx = (level_stats->frame_window_buffer.start +
5602            level_stats->frame_window_buffer.len++) %
5603           FRAME_WINDOW_SIZE;
5604   } else {
5605     idx = level_stats->frame_window_buffer.start;
5606     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5607   }
5608   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5609   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5610   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5611
5612   if (cm->frame_type == KEY_FRAME) {
5613     level_stats->ref_refresh_map = 0;
5614   } else {
5615     int count = 0;
5616     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5617     // Also need to consider the case where the encoder refers to a buffer
5618     // that has been implicitly refreshed after encoding a keyframe.
5619     if (!cm->intra_only) {
5620       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5621       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5622       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5623     }
5624     for (i = 0; i < REF_FRAMES; ++i) {
5625       count += (level_stats->ref_refresh_map >> i) & 1;
5626     }
5627     if (count > level_spec->max_ref_frame_buffers) {
5628       level_spec->max_ref_frame_buffers = count;
5629     }
5630   }
5631
5632   // update average_bitrate
5633   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5634                                 125.0 / level_stats->time_encoded;
5635
5636   // update max_luma_sample_rate
5637   luma_samples = 0;
5638   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5639     idx = (level_stats->frame_window_buffer.start +
5640            level_stats->frame_window_buffer.len - 1 - i) %
5641           FRAME_WINDOW_SIZE;
5642     if (i == 0) {
5643       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5644     }
5645     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5646         TICKS_PER_SEC) {
5647       break;
5648     }
5649     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5650   }
5651   if (luma_samples > level_spec->max_luma_sample_rate) {
5652     level_spec->max_luma_sample_rate = luma_samples;
5653   }
5654
5655   // update max_cpb_size
5656   cpb_data_size = 0;
5657   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5658     if (i >= level_stats->frame_window_buffer.len) break;
5659     idx = (level_stats->frame_window_buffer.start +
5660            level_stats->frame_window_buffer.len - 1 - i) %
5661           FRAME_WINDOW_SIZE;
5662     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5663   }
5664   cpb_data_size = cpb_data_size / 125.0;
5665   if (cpb_data_size > level_spec->max_cpb_size) {
5666     level_spec->max_cpb_size = cpb_data_size;
5667   }
5668
5669   // update max_luma_picture_size
5670   if (luma_pic_size > level_spec->max_luma_picture_size) {
5671     level_spec->max_luma_picture_size = luma_pic_size;
5672   }
5673
5674   // update max_luma_picture_breadth
5675   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5676     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5677   }
5678
5679   // update compression_ratio
5680   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5681                                   cm->bit_depth /
5682                                   level_stats->total_compressed_size / 8.0;
5683
5684   // update max_col_tiles
5685   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5686     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5687   }
5688
5689   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5690     if (level_spec->max_luma_picture_size >
5691         vp9_level_defs[level_index].max_luma_picture_size) {
5692       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5693       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5694                          "Failed to encode to the target level %d. %s",
5695                          vp9_level_defs[level_index].level,
5696                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5697     }
5698
5699     if (level_spec->max_luma_picture_breadth >
5700         vp9_level_defs[level_index].max_luma_picture_breadth) {
5701       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5702       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5703                          "Failed to encode to the target level %d. %s",
5704                          vp9_level_defs[level_index].level,
5705                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5706     }
5707
5708     if ((double)level_spec->max_luma_sample_rate >
5709         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5710             (1 + SAMPLE_RATE_GRACE_P)) {
5711       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5712       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5713                          "Failed to encode to the target level %d. %s",
5714                          vp9_level_defs[level_index].level,
5715                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5716     }
5717
5718     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5719       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5720       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5721                          "Failed to encode to the target level %d. %s",
5722                          vp9_level_defs[level_index].level,
5723                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5724     }
5725
5726     if (level_spec->min_altref_distance <
5727         vp9_level_defs[level_index].min_altref_distance) {
5728       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5729       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5730                          "Failed to encode to the target level %d. %s",
5731                          vp9_level_defs[level_index].level,
5732                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5733     }
5734
5735     if (level_spec->max_ref_frame_buffers >
5736         vp9_level_defs[level_index].max_ref_frame_buffers) {
5737       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5738       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5739                          "Failed to encode to the target level %d. %s",
5740                          vp9_level_defs[level_index].level,
5741                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5742     }
5743
5744     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5745       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5746       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5747                          "Failed to encode to the target level %d. %s",
5748                          vp9_level_defs[level_index].level,
5749                          level_fail_messages[CPB_TOO_LARGE]);
5750     }
5751
5752     // Set an upper bound for the next frame size. It will be used in
5753     // level_rc_framerate() before encoding the next frame.
5754     cpb_data_size = 0;
5755     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5756       if (i >= level_stats->frame_window_buffer.len) break;
5757       idx = (level_stats->frame_window_buffer.start +
5758              level_stats->frame_window_buffer.len - 1 - i) %
5759             FRAME_WINDOW_SIZE;
5760       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5761     }
5762     cpb_data_size = cpb_data_size / 125.0;
5763     level_constraint->max_frame_size =
5764         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
5765               1000.0);
5766     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
5767       level_constraint->max_frame_size >>= 1;
5768   }
5769 }
5770
5771 typedef struct GF_PICTURE {
5772   YV12_BUFFER_CONFIG *frame;
5773   int ref_frame[3];
5774   FRAME_UPDATE_TYPE update_type;
5775 } GF_PICTURE;
5776
5777 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
5778                             const GF_GROUP *gf_group, int *tpl_group_frames) {
5779   VP9_COMMON *cm = &cpi->common;
5780   int frame_idx = 0;
5781   int i;
5782   int gld_index = -1;
5783   int alt_index = -1;
5784   int lst_index = -1;
5785   int arf_index_stack[MAX_ARF_LAYERS];
5786   int arf_stack_size = 0;
5787   int extend_frame_count = 0;
5788   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
5789   int frame_gop_offset = 0;
5790
5791   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
5792   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
5793
5794   memset(recon_frame_index, -1, sizeof(recon_frame_index));
5795   stack_init(arf_index_stack, MAX_ARF_LAYERS);
5796
5797   // TODO(jingning): To be used later for gf frame type parsing.
5798   (void)gf_group;
5799
5800   for (i = 0; i < FRAME_BUFFERS; ++i) {
5801     if (frame_bufs[i].ref_count == 0) {
5802       alloc_frame_mvs(cm, i);
5803       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
5804                                    cm->subsampling_x, cm->subsampling_y,
5805 #if CONFIG_VP9_HIGHBITDEPTH
5806                                    cm->use_highbitdepth,
5807 #endif
5808                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
5809                                    NULL, NULL, NULL))
5810         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
5811                            "Failed to allocate frame buffer");
5812
5813       recon_frame_index[frame_idx] = i;
5814       ++frame_idx;
5815
5816       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
5817     }
5818   }
5819
5820   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
5821     assert(recon_frame_index[i] >= 0);
5822     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
5823   }
5824
5825   *tpl_group_frames = 0;
5826
5827   // Initialize Golden reference frame.
5828   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
5829   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
5830   gf_picture[0].update_type = gf_group->update_type[0];
5831   gld_index = 0;
5832   ++*tpl_group_frames;
5833
5834   // Initialize base layer ARF frame
5835   gf_picture[1].frame = cpi->Source;
5836   gf_picture[1].ref_frame[0] = gld_index;
5837   gf_picture[1].ref_frame[1] = lst_index;
5838   gf_picture[1].ref_frame[2] = alt_index;
5839   gf_picture[1].update_type = gf_group->update_type[1];
5840   alt_index = 1;
5841   ++*tpl_group_frames;
5842
5843   // Initialize P frames
5844   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5845     struct lookahead_entry *buf;
5846     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
5847     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5848
5849     if (buf == NULL) break;
5850
5851     gf_picture[frame_idx].frame = &buf->img;
5852     gf_picture[frame_idx].ref_frame[0] = gld_index;
5853     gf_picture[frame_idx].ref_frame[1] = lst_index;
5854     gf_picture[frame_idx].ref_frame[2] = alt_index;
5855     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
5856
5857     switch (gf_group->update_type[frame_idx]) {
5858       case ARF_UPDATE:
5859         stack_push(arf_index_stack, alt_index, arf_stack_size);
5860         ++arf_stack_size;
5861         alt_index = frame_idx;
5862         break;
5863       case LF_UPDATE: lst_index = frame_idx; break;
5864       case OVERLAY_UPDATE:
5865         gld_index = frame_idx;
5866         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5867         --arf_stack_size;
5868         break;
5869       case USE_BUF_FRAME:
5870         lst_index = alt_index;
5871         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5872         --arf_stack_size;
5873         break;
5874       default: break;
5875     }
5876
5877     ++*tpl_group_frames;
5878
5879     // The length of group of pictures is baseline_gf_interval, plus the
5880     // beginning golden frame from last GOP, plus the last overlay frame in
5881     // the same GOP.
5882     if (frame_idx == gf_group->gf_group_size) break;
5883   }
5884
5885   alt_index = -1;
5886   ++frame_idx;
5887   ++frame_gop_offset;
5888
5889   // Extend two frames outside the current gf group.
5890   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
5891     struct lookahead_entry *buf =
5892         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5893
5894     if (buf == NULL) break;
5895
5896     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
5897
5898     gf_picture[frame_idx].frame = &buf->img;
5899     gf_picture[frame_idx].ref_frame[0] = gld_index;
5900     gf_picture[frame_idx].ref_frame[1] = lst_index;
5901     gf_picture[frame_idx].ref_frame[2] = alt_index;
5902     gf_picture[frame_idx].update_type = LF_UPDATE;
5903     lst_index = frame_idx;
5904     ++*tpl_group_frames;
5905     ++extend_frame_count;
5906     ++frame_gop_offset;
5907   }
5908 }
5909
5910 static void init_tpl_stats(VP9_COMP *cpi) {
5911   int frame_idx;
5912   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5913     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
5914     memset(tpl_frame->tpl_stats_ptr, 0,
5915            tpl_frame->height * tpl_frame->width *
5916                sizeof(*tpl_frame->tpl_stats_ptr));
5917     tpl_frame->is_valid = 0;
5918   }
5919 }
5920
5921 #if CONFIG_NON_GREEDY_MV
5922 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5923                                          MotionField *motion_field,
5924                                          int frame_idx, uint8_t *cur_frame_buf,
5925                                          uint8_t *ref_frame_buf, int stride,
5926                                          BLOCK_SIZE bsize, int mi_row,
5927                                          int mi_col, MV *mv) {
5928   MACROBLOCK *const x = &td->mb;
5929   MACROBLOCKD *const xd = &x->e_mbd;
5930   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5931   int step_param;
5932   uint32_t bestsme = UINT_MAX;
5933   const MvLimits tmp_mv_limits = x->mv_limits;
5934   // lambda is used to adjust the importance of motion vector consistency.
5935   // TODO(angiebird): Figure out lambda's proper value.
5936   const int lambda = cpi->tpl_stats[frame_idx].lambda;
5937   int_mv nb_full_mvs[NB_MVS_NUM];
5938   int nb_full_mv_num;
5939
5940   MV best_ref_mv1 = { 0, 0 };
5941   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5942
5943   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
5944   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
5945
5946   // Setup frame pointers
5947   x->plane[0].src.buf = cur_frame_buf;
5948   x->plane[0].src.stride = stride;
5949   xd->plane[0].pre[0].buf = ref_frame_buf;
5950   xd->plane[0].pre[0].stride = stride;
5951
5952   step_param = mv_sf->reduce_first_step_size;
5953   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
5954
5955   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
5956
5957   nb_full_mv_num =
5958       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
5959   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
5960                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
5961
5962   /* restore UMV window */
5963   x->mv_limits = tmp_mv_limits;
5964
5965   return bestsme;
5966 }
5967
5968 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5969                                         uint8_t *cur_frame_buf,
5970                                         uint8_t *ref_frame_buf, int stride,
5971                                         BLOCK_SIZE bsize, MV *mv) {
5972   MACROBLOCK *const x = &td->mb;
5973   MACROBLOCKD *const xd = &x->e_mbd;
5974   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5975   uint32_t bestsme = UINT_MAX;
5976   uint32_t distortion;
5977   uint32_t sse;
5978   int cost_list[5];
5979
5980   MV best_ref_mv1 = { 0, 0 };
5981
5982   // Setup frame pointers
5983   x->plane[0].src.buf = cur_frame_buf;
5984   x->plane[0].src.stride = stride;
5985   xd->plane[0].pre[0].buf = ref_frame_buf;
5986   xd->plane[0].pre[0].stride = stride;
5987
5988   // TODO(yunqing): may use higher tap interp filter than 2 taps.
5989   // Ignore mv costing by sending NULL pointer instead of cost array
5990   bestsme = cpi->find_fractional_mv_step(
5991       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
5992       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
5993       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
5994       USE_2_TAPS);
5995
5996   return bestsme;
5997 }
5998
5999 #else  // CONFIG_NON_GREEDY_MV
6000 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
6001                                               uint8_t *cur_frame_buf,
6002                                               uint8_t *ref_frame_buf,
6003                                               int stride, BLOCK_SIZE bsize,
6004                                               MV *mv) {
6005   MACROBLOCK *const x = &td->mb;
6006   MACROBLOCKD *const xd = &x->e_mbd;
6007   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6008   const SEARCH_METHODS search_method = NSTEP;
6009   int step_param;
6010   int sadpb = x->sadperbit16;
6011   uint32_t bestsme = UINT_MAX;
6012   uint32_t distortion;
6013   uint32_t sse;
6014   int cost_list[5];
6015   const MvLimits tmp_mv_limits = x->mv_limits;
6016
6017   MV best_ref_mv1 = { 0, 0 };
6018   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6019
6020   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6021   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6022
6023   // Setup frame pointers
6024   x->plane[0].src.buf = cur_frame_buf;
6025   x->plane[0].src.stride = stride;
6026   xd->plane[0].pre[0].buf = ref_frame_buf;
6027   xd->plane[0].pre[0].stride = stride;
6028
6029   step_param = mv_sf->reduce_first_step_size;
6030   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6031
6032   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6033
6034   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
6035                         search_method, sadpb, cond_cost_list(cpi, cost_list),
6036                         &best_ref_mv1, mv, 0, 0);
6037
6038   /* restore UMV window */
6039   x->mv_limits = tmp_mv_limits;
6040
6041   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6042   // Ignore mv costing by sending NULL pointer instead of cost array
6043   bestsme = cpi->find_fractional_mv_step(
6044       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6045       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6046       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6047       USE_2_TAPS);
6048
6049   return bestsme;
6050 }
6051 #endif
6052
6053 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6054                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6055   int width = 0, height = 0;
6056   int bw = 4 << b_width_log2_lookup[bsize];
6057   int bh = 4 << b_height_log2_lookup[bsize];
6058
6059   switch (block) {
6060     case 0:
6061       width = grid_pos_col + bw - ref_pos_col;
6062       height = grid_pos_row + bh - ref_pos_row;
6063       break;
6064     case 1:
6065       width = ref_pos_col + bw - grid_pos_col;
6066       height = grid_pos_row + bh - ref_pos_row;
6067       break;
6068     case 2:
6069       width = grid_pos_col + bw - ref_pos_col;
6070       height = ref_pos_row + bh - grid_pos_row;
6071       break;
6072     case 3:
6073       width = ref_pos_col + bw - grid_pos_col;
6074       height = ref_pos_row + bh - grid_pos_row;
6075       break;
6076     default: assert(0);
6077   }
6078
6079   return width * height;
6080 }
6081
6082 static int round_floor(int ref_pos, int bsize_pix) {
6083   int round;
6084   if (ref_pos < 0)
6085     round = -(1 + (-ref_pos - 1) / bsize_pix);
6086   else
6087     round = ref_pos / bsize_pix;
6088
6089   return round;
6090 }
6091
6092 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6093                             BLOCK_SIZE bsize, int stride) {
6094   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6095   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6096   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6097   int idx, idy;
6098
6099   for (idy = 0; idy < mi_height; ++idy) {
6100     for (idx = 0; idx < mi_width; ++idx) {
6101       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6102       const int64_t mc_flow = tpl_ptr->mc_flow;
6103       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6104       *tpl_ptr = *src_stats;
6105       tpl_ptr->mc_flow = mc_flow;
6106       tpl_ptr->mc_ref_cost = mc_ref_cost;
6107       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6108     }
6109   }
6110 }
6111
6112 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6113                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6114   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6115   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6116   MV mv = tpl_stats->mv.as_mv;
6117   int mv_row = mv.row >> 3;
6118   int mv_col = mv.col >> 3;
6119
6120   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6121   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6122
6123   const int bw = 4 << b_width_log2_lookup[bsize];
6124   const int bh = 4 << b_height_log2_lookup[bsize];
6125   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6126   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6127   const int pix_num = bw * bh;
6128
6129   // top-left on grid block location in pixel
6130   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6131   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6132   int block;
6133
6134   for (block = 0; block < 4; ++block) {
6135     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6136     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6137
6138     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6139         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6140       int overlap_area = get_overlap_area(
6141           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6142       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6143       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6144
6145       int64_t mc_flow = tpl_stats->mc_dep_cost -
6146                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6147                             tpl_stats->intra_cost;
6148
6149       int idx, idy;
6150
6151       for (idy = 0; idy < mi_height; ++idy) {
6152         for (idx = 0; idx < mi_width; ++idx) {
6153           TplDepStats *des_stats =
6154               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6155                          (ref_mi_col + idx)];
6156
6157           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6158           des_stats->mc_ref_cost +=
6159               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6160               pix_num;
6161           assert(overlap_area >= 0);
6162         }
6163       }
6164     }
6165   }
6166 }
6167
6168 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6169                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6170   int idx, idy;
6171   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6172   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6173
6174   for (idy = 0; idy < mi_height; ++idy) {
6175     for (idx = 0; idx < mi_width; ++idx) {
6176       TplDepStats *tpl_ptr =
6177           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6178       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6179                          BLOCK_8X8);
6180     }
6181   }
6182 }
6183
6184 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6185                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6186                                TX_SIZE tx_size, int64_t *recon_error,
6187                                int64_t *sse) {
6188   MACROBLOCKD *const xd = &x->e_mbd;
6189   const struct macroblock_plane *const p = &x->plane[plane];
6190   const struct macroblockd_plane *const pd = &xd->plane[plane];
6191   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6192   uint16_t eob;
6193   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6194   const int shift = tx_size == TX_32X32 ? 0 : 2;
6195
6196 #if CONFIG_VP9_HIGHBITDEPTH
6197   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6198     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6199                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6200                                  &eob, scan_order->scan, scan_order->iscan);
6201   } else {
6202     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6203                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6204                           scan_order->scan, scan_order->iscan);
6205   }
6206 #else
6207   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6208                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6209                         scan_order->iscan);
6210 #endif  // CONFIG_VP9_HIGHBITDEPTH
6211
6212   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6213   *recon_error = VPXMAX(*recon_error, 1);
6214
6215   *sse = (*sse) >> shift;
6216   *sse = VPXMAX(*sse, 1);
6217 }
6218
6219 #if CONFIG_VP9_HIGHBITDEPTH
6220 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6221                          TX_SIZE tx_size) {
6222   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6223   switch (tx_size) {
6224     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6225     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6226     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6227     default: assert(0);
6228   }
6229 }
6230 #endif  // CONFIG_VP9_HIGHBITDEPTH
6231
6232 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6233                   TX_SIZE tx_size) {
6234   switch (tx_size) {
6235     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6236     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6237     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6238     default: assert(0);
6239   }
6240 }
6241
6242 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6243                           int mi_col) {
6244   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6245   x->mv_limits.row_max =
6246       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6247   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6248   x->mv_limits.col_max =
6249       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6250 }
6251
6252 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6253                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6254                             int frame_idx, TplDepFrame *tpl_frame,
6255                             int16_t *src_diff, tran_low_t *coeff,
6256                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6257                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6258                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6259                             int64_t *recon_error, int64_t *sse) {
6260   VP9_COMMON *cm = &cpi->common;
6261   ThreadData *td = &cpi->td;
6262
6263   const int bw = 4 << b_width_log2_lookup[bsize];
6264   const int bh = 4 << b_height_log2_lookup[bsize];
6265   const int pix_num = bw * bh;
6266   int best_rf_idx = -1;
6267   int_mv best_mv;
6268   int64_t best_inter_cost = INT64_MAX;
6269   int64_t inter_cost;
6270   int rf_idx;
6271   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6272
6273   int64_t best_intra_cost = INT64_MAX;
6274   int64_t intra_cost;
6275   PREDICTION_MODE mode;
6276   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6277   MODE_INFO mi_above, mi_left;
6278   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6279   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6280   TplDepStats *tpl_stats =
6281       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6282
6283   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6284   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6285   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6286   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6287   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6288   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6289
6290   // Intra prediction search
6291   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6292     uint8_t *src, *dst;
6293     int src_stride, dst_stride;
6294
6295     src = xd->cur_buf->y_buffer + mb_y_offset;
6296     src_stride = xd->cur_buf->y_stride;
6297
6298     dst = &predictor[0];
6299     dst_stride = bw;
6300
6301     xd->mi[0]->sb_type = bsize;
6302     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6303
6304     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6305                             src_stride, dst, dst_stride, 0, 0, 0);
6306
6307 #if CONFIG_VP9_HIGHBITDEPTH
6308     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6309       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6310                                 dst_stride, xd->bd);
6311       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6312       intra_cost = vpx_highbd_satd(coeff, pix_num);
6313     } else {
6314       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6315                          dst_stride);
6316       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6317       intra_cost = vpx_satd(coeff, pix_num);
6318     }
6319 #else
6320     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6321     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6322     intra_cost = vpx_satd(coeff, pix_num);
6323 #endif  // CONFIG_VP9_HIGHBITDEPTH
6324
6325     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6326   }
6327
6328   // Motion compensated prediction
6329   best_mv.as_int = 0;
6330
6331   set_mv_limits(cm, x, mi_row, mi_col);
6332
6333   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6334     int_mv mv;
6335 #if CONFIG_NON_GREEDY_MV
6336     MotionField *motion_field;
6337 #endif
6338     if (ref_frame[rf_idx] == NULL) continue;
6339
6340 #if CONFIG_NON_GREEDY_MV
6341     (void)td;
6342     motion_field = vp9_motion_field_info_get_motion_field(
6343         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6344     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6345 #else
6346     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6347                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6348                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6349 #endif
6350
6351 #if CONFIG_VP9_HIGHBITDEPTH
6352     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6353       vp9_highbd_build_inter_predictor(
6354           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6355           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6356           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6357           mi_row * MI_SIZE, xd->bd);
6358       vpx_highbd_subtract_block(
6359           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6360           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6361       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6362       inter_cost = vpx_highbd_satd(coeff, pix_num);
6363     } else {
6364       vp9_build_inter_predictor(
6365           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6366           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6367           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6368       vpx_subtract_block(bh, bw, src_diff, bw,
6369                          xd->cur_buf->y_buffer + mb_y_offset,
6370                          xd->cur_buf->y_stride, &predictor[0], bw);
6371       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6372       inter_cost = vpx_satd(coeff, pix_num);
6373     }
6374 #else
6375     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6376                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6377                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6378                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6379     vpx_subtract_block(bh, bw, src_diff, bw,
6380                        xd->cur_buf->y_buffer + mb_y_offset,
6381                        xd->cur_buf->y_stride, &predictor[0], bw);
6382     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6383     inter_cost = vpx_satd(coeff, pix_num);
6384 #endif
6385
6386     if (inter_cost < best_inter_cost) {
6387       best_rf_idx = rf_idx;
6388       best_inter_cost = inter_cost;
6389       best_mv.as_int = mv.as_int;
6390       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6391                          sse);
6392     }
6393   }
6394   best_intra_cost = VPXMAX(best_intra_cost, 1);
6395   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6396   tpl_stats->inter_cost = VPXMAX(
6397       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6398   tpl_stats->intra_cost = VPXMAX(
6399       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6400   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6401   tpl_stats->mv.as_int = best_mv.as_int;
6402 }
6403
6404 #if CONFIG_NON_GREEDY_MV
6405 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6406                                   int frame_idx, int rf_idx, int mi_row,
6407                                   int mi_col, struct buf_2d *src,
6408                                   struct buf_2d *pre) {
6409   const int mb_y_offset =
6410       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6411   YV12_BUFFER_CONFIG *ref_frame = NULL;
6412   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6413   if (ref_frame_idx != -1) {
6414     ref_frame = gf_picture[ref_frame_idx].frame;
6415     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6416     src->stride = xd->cur_buf->y_stride;
6417     pre->buf = ref_frame->y_buffer + mb_y_offset;
6418     pre->stride = ref_frame->y_stride;
6419     assert(src->stride == pre->stride);
6420     return 1;
6421   } else {
6422     printf("invalid ref_frame_idx");
6423     assert(ref_frame_idx != -1);
6424     return 0;
6425   }
6426 }
6427
6428 #define kMvPreCheckLines 5
6429 #define kMvPreCheckSize 15
6430
6431 #define MV_REF_POS_NUM 3
6432 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6433   { -1, 0 },
6434   { 0, -1 },
6435   { -1, -1 },
6436 };
6437
6438 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6439                              int mi_col) {
6440   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6441 }
6442
6443 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6444                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6445   int i;
6446   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6447   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6448   int_mv nearest_mv, near_mv, invalid_mv;
6449   nearest_mv.as_int = INVALID_MV;
6450   near_mv.as_int = INVALID_MV;
6451   invalid_mv.as_int = INVALID_MV;
6452   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6453     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6454     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6455     assert(mv_ref_pos[i].row <= 0);
6456     assert(mv_ref_pos[i].col <= 0);
6457     if (nb_row >= 0 && nb_col >= 0) {
6458       if (nearest_mv.as_int == INVALID_MV) {
6459         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6460       } else {
6461         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6462         if (mv.as_int == nearest_mv.as_int) {
6463           continue;
6464         } else {
6465           near_mv = mv;
6466           break;
6467         }
6468       }
6469     }
6470   }
6471   if (nearest_mv.as_int == INVALID_MV) {
6472     nearest_mv.as_mv.row = 0;
6473     nearest_mv.as_mv.col = 0;
6474   }
6475   if (near_mv.as_int == INVALID_MV) {
6476     near_mv.as_mv.row = 0;
6477     near_mv.as_mv.col = 0;
6478   }
6479   if (mv_mode == NEAREST_MV_MODE) {
6480     return nearest_mv;
6481   }
6482   if (mv_mode == NEAR_MV_MODE) {
6483     return near_mv;
6484   }
6485   assert(0);
6486   return invalid_mv;
6487 }
6488
6489 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6490                                   MotionField *motion_field,
6491                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6492                                   int mi_row, int mi_col) {
6493   int_mv mv;
6494   switch (mv_mode) {
6495     case ZERO_MV_MODE:
6496       mv.as_mv.row = 0;
6497       mv.as_mv.col = 0;
6498       break;
6499     case NEW_MV_MODE:
6500       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6501       break;
6502     case NEAREST_MV_MODE:
6503       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6504       break;
6505     case NEAR_MV_MODE:
6506       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6507       break;
6508     default:
6509       mv.as_int = INVALID_MV;
6510       assert(0);
6511       break;
6512   }
6513   return mv;
6514 }
6515
6516 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6517                           GF_PICTURE *gf_picture, MotionField *motion_field,
6518                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6519                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6520                           int_mv *mv) {
6521   uint32_t sse;
6522   struct buf_2d src;
6523   struct buf_2d pre;
6524   MV full_mv;
6525   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6526                             mi_row, mi_col);
6527   full_mv = get_full_mv(&mv->as_mv);
6528   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6529                              &src, &pre)) {
6530     // TODO(angiebird): Consider subpixel when computing the sse.
6531     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6532                           pre.stride, &sse);
6533     return (double)(sse << VP9_DIST_SCALE_LOG2);
6534   } else {
6535     assert(0);
6536     return 0;
6537   }
6538 }
6539
6540 static int get_mv_mode_cost(int mv_mode) {
6541   // TODO(angiebird): The probabilities are roughly inferred from
6542   // default_inter_mode_probs. Check if there is a better way to set the
6543   // probabilities.
6544   const int zero_mv_prob = 16;
6545   const int new_mv_prob = 24 * 1;
6546   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6547   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6548   switch (mv_mode) {
6549     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6550     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6551     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6552     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6553     default: assert(0); return -1;
6554   }
6555 }
6556
6557 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6558   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6559                         log2(1 + abs(new_mv->col - ref_mv->col));
6560   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6561   return mv_diff_cost;
6562 }
6563 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6564                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6565                           int mi_col) {
6566   double mv_cost = get_mv_mode_cost(mv_mode);
6567   if (mv_mode == NEW_MV_MODE) {
6568     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6569                                     bsize, mi_row, mi_col)
6570                     .as_mv;
6571     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6572                                         tpl_frame, bsize, mi_row, mi_col)
6573                         .as_mv;
6574     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6575                                      bsize, mi_row, mi_col)
6576                      .as_mv;
6577     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6578     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6579     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6580   }
6581   return mv_cost;
6582 }
6583
6584 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6585                            GF_PICTURE *gf_picture, MotionField *motion_field,
6586                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6587                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6588                            int_mv *mv) {
6589   MACROBLOCKD *xd = &x->e_mbd;
6590   double mv_dist =
6591       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6592                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6593   double mv_cost =
6594       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6595   double mult = 180;
6596
6597   return mv_cost + mult * log2f(1 + mv_dist);
6598 }
6599
6600 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6601                                  GF_PICTURE *gf_picture,
6602                                  MotionField *motion_field, int frame_idx,
6603                                  TplDepFrame *tpl_frame, int rf_idx,
6604                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6605                                  double *rd, int_mv *mv) {
6606   int best_mv_mode = ZERO_MV_MODE;
6607   int update = 0;
6608   int mv_mode;
6609   *rd = 0;
6610   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6611     double this_rd;
6612     int_mv this_mv;
6613     if (mv_mode == NEW_MV_MODE) {
6614       continue;
6615     }
6616     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6617                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6618     if (update == 0) {
6619       *rd = this_rd;
6620       *mv = this_mv;
6621       best_mv_mode = mv_mode;
6622       update = 1;
6623     } else {
6624       if (this_rd < *rd) {
6625         *rd = this_rd;
6626         *mv = this_mv;
6627         best_mv_mode = mv_mode;
6628       }
6629     }
6630   }
6631   return best_mv_mode;
6632 }
6633
6634 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6635                             GF_PICTURE *gf_picture, MotionField *motion_field,
6636                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6637                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6638   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6639   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6640   int tmp_mv_mode_arr[kMvPreCheckSize];
6641   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6642   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6643   int_mv *select_mv_arr = cpi->select_mv_arr;
6644   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6645   int stride = tpl_frame->stride;
6646   double new_mv_rd = 0;
6647   double no_new_mv_rd = 0;
6648   double this_new_mv_rd = 0;
6649   double this_no_new_mv_rd = 0;
6650   int idx;
6651   int tmp_idx;
6652   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6653
6654   // no new mv
6655   // diagonal scan order
6656   tmp_idx = 0;
6657   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6658     int r;
6659     for (r = 0; r <= idx; ++r) {
6660       int c = idx - r;
6661       int nb_row = mi_row + r * mi_height;
6662       int nb_col = mi_col + c * mi_width;
6663       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6664         double this_rd;
6665         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6666         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6667             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6668             bsize, nb_row, nb_col, &this_rd, mv);
6669         if (r == 0 && c == 0) {
6670           this_no_new_mv_rd = this_rd;
6671         }
6672         no_new_mv_rd += this_rd;
6673         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6674         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6675         ++tmp_idx;
6676       }
6677     }
6678   }
6679
6680   // new mv
6681   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6682   this_new_mv_rd = eval_mv_mode(
6683       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6684       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6685   new_mv_rd = this_new_mv_rd;
6686   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6687   // beforehand.
6688   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6689     int r;
6690     for (r = 0; r <= idx; ++r) {
6691       int c = idx - r;
6692       int nb_row = mi_row + r * mi_height;
6693       int nb_col = mi_col + c * mi_width;
6694       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6695         double this_rd;
6696         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6697         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6698             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6699             bsize, nb_row, nb_col, &this_rd, mv);
6700         new_mv_rd += this_rd;
6701       }
6702     }
6703   }
6704
6705   // update best_mv_mode
6706   tmp_idx = 0;
6707   if (no_new_mv_rd < new_mv_rd) {
6708     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6709       int r;
6710       for (r = 0; r <= idx; ++r) {
6711         int c = idx - r;
6712         int nb_row = mi_row + r * mi_height;
6713         int nb_col = mi_col + c * mi_width;
6714         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6715           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6716           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6717           ++tmp_idx;
6718         }
6719       }
6720     }
6721     rd_diff_arr[mi_row * stride + mi_col] = 0;
6722   } else {
6723     rd_diff_arr[mi_row * stride + mi_col] =
6724         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6725   }
6726 }
6727
6728 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6729                                 GF_PICTURE *gf_picture,
6730                                 MotionField *motion_field, int frame_idx,
6731                                 TplDepFrame *tpl_frame, int rf_idx,
6732                                 BLOCK_SIZE bsize) {
6733   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6734   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6735   const int unit_rows = tpl_frame->mi_rows / mi_height;
6736   const int unit_cols = tpl_frame->mi_cols / mi_width;
6737   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6738   int idx;
6739   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6740     int r;
6741     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6742          ++r) {
6743       int c = idx - r;
6744       int mi_row = r * mi_height;
6745       int mi_col = c * mi_width;
6746       assert(c >= 0 && c < unit_cols);
6747       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6748       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6749       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6750                       rf_idx, bsize, mi_row, mi_col);
6751     }
6752   }
6753 }
6754
6755 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6756                              MotionField *motion_field, int frame_idx,
6757                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
6758                              int mi_row, int mi_col) {
6759   VP9_COMMON *cm = &cpi->common;
6760   MACROBLOCK *x = &td->mb;
6761   MACROBLOCKD *xd = &x->e_mbd;
6762   const int mb_y_offset =
6763       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6764   assert(ref_frame != NULL);
6765   set_mv_limits(cm, x, mi_row, mi_col);
6766   {
6767     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6768     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
6769     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
6770     const int stride = xd->cur_buf->y_stride;
6771     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
6772                              ref_frame_buf, stride, bsize, mi_row, mi_col,
6773                              &mv.as_mv);
6774     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
6775                             bsize, &mv.as_mv);
6776     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
6777   }
6778 }
6779
6780 static void build_motion_field(
6781     VP9_COMP *cpi, int frame_idx,
6782     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
6783   VP9_COMMON *cm = &cpi->common;
6784   ThreadData *td = &cpi->td;
6785   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6786   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6787   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6788   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
6789   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
6790   int mi_row, mi_col;
6791   int rf_idx;
6792
6793   tpl_frame->lambda = (pw * ph) >> 2;
6794   assert(pw * ph == tpl_frame->lambda << 2);
6795
6796   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6797     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6798         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6799     if (ref_frame[rf_idx] == NULL) {
6800       continue;
6801     }
6802     vp9_motion_field_reset_mvs(motion_field);
6803     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6804       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6805         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
6806                          bsize, mi_row, mi_col);
6807       }
6808     }
6809   }
6810 }
6811 #endif  // CONFIG_NON_GREEDY_MV
6812
6813 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6814                               int frame_idx, BLOCK_SIZE bsize) {
6815   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6816   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
6817   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
6818
6819   VP9_COMMON *cm = &cpi->common;
6820   struct scale_factors sf;
6821   int rdmult, idx;
6822   ThreadData *td = &cpi->td;
6823   MACROBLOCK *x = &td->mb;
6824   MACROBLOCKD *xd = &x->e_mbd;
6825   int mi_row, mi_col;
6826
6827 #if CONFIG_VP9_HIGHBITDEPTH
6828   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
6829   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
6830   uint8_t *predictor;
6831 #else
6832   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
6833 #endif
6834   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
6835   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
6836   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
6837   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
6838
6839   const TX_SIZE tx_size = max_txsize_lookup[bsize];
6840   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6841   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6842   int64_t recon_error, sse;
6843 #if CONFIG_NON_GREEDY_MV
6844   int square_block_idx;
6845   int rf_idx;
6846 #endif
6847
6848   // Setup scaling factor
6849 #if CONFIG_VP9_HIGHBITDEPTH
6850   vp9_setup_scale_factors_for_frame(
6851       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6852       this_frame->y_crop_width, this_frame->y_crop_height,
6853       cpi->common.use_highbitdepth);
6854
6855   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6856     predictor = CONVERT_TO_BYTEPTR(predictor16);
6857   else
6858     predictor = predictor8;
6859 #else
6860   vp9_setup_scale_factors_for_frame(
6861       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6862       this_frame->y_crop_width, this_frame->y_crop_height);
6863 #endif  // CONFIG_VP9_HIGHBITDEPTH
6864
6865   // Prepare reference frame pointers. If any reference frame slot is
6866   // unavailable, the pointer will be set to Null.
6867   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
6868     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
6869     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
6870   }
6871
6872   xd->mi = cm->mi_grid_visible;
6873   xd->mi[0] = cm->mi;
6874   xd->cur_buf = this_frame;
6875
6876   // Get rd multiplier set up.
6877   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
6878   set_error_per_bit(&cpi->td.mb, rdmult);
6879   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
6880
6881   tpl_frame->is_valid = 1;
6882
6883   cm->base_qindex = tpl_frame->base_qindex;
6884   vp9_frame_init_quantizer(cpi);
6885
6886 #if CONFIG_NON_GREEDY_MV
6887   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
6888        ++square_block_idx) {
6889     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
6890     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
6891   }
6892   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6893     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6894     if (ref_frame_idx != -1) {
6895       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6896           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6897       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
6898                           tpl_frame, rf_idx, bsize);
6899     }
6900   }
6901 #endif
6902
6903   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6904     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6905       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
6906                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
6907                       tx_size, ref_frame, predictor, &recon_error, &sse);
6908       // Motion flow dependency dispenser.
6909       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
6910                       tpl_frame->stride);
6911
6912       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
6913                        bsize);
6914     }
6915   }
6916 }
6917
6918 #if CONFIG_NON_GREEDY_MV
6919 #define DUMP_TPL_STATS 0
6920 #if DUMP_TPL_STATS
6921 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
6922   int i, j;
6923   printf("%d %d\n", h, w);
6924   for (i = 0; i < h; ++i) {
6925     for (j = 0; j < w; ++j) {
6926       printf("%d ", buf[(row + i) * stride + col + j]);
6927     }
6928   }
6929   printf("\n");
6930 }
6931
6932 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
6933   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
6934            frame_buf->y_width);
6935   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
6936            frame_buf->uv_height, frame_buf->uv_width);
6937   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
6938            frame_buf->uv_height, frame_buf->uv_width);
6939 }
6940
6941 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
6942                            const GF_GROUP *gf_group,
6943                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
6944   int frame_idx;
6945   const VP9_COMMON *cm = &cpi->common;
6946   int rf_idx;
6947   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
6948     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6949       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6950       int mi_row, mi_col;
6951       int ref_frame_idx;
6952       const int mi_height = num_8x8_blocks_high_lookup[bsize];
6953       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6954       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6955       if (ref_frame_idx != -1) {
6956         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
6957         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
6958         const int ref_gf_frame_offset =
6959             gf_group->frame_gop_index[ref_frame_idx];
6960         printf("=\n");
6961         printf(
6962             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
6963             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
6964             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
6965             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
6966         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6967           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6968             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6969               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
6970                                                        frame_idx, rf_idx, bsize,
6971                                                        mi_row, mi_col);
6972               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
6973                      mv.as_mv.col);
6974             }
6975           }
6976         }
6977         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6978           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6979             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6980               const TplDepStats *tpl_ptr =
6981                   &tpl_frame
6982                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6983               printf("%f ", tpl_ptr->feature_score);
6984             }
6985           }
6986         }
6987         printf("\n");
6988
6989         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6990           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6991             const int mv_mode =
6992                 tpl_frame
6993                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
6994             printf("%d ", mv_mode);
6995           }
6996         }
6997         printf("\n");
6998
6999         dump_frame_buf(gf_picture[frame_idx].frame);
7000         dump_frame_buf(ref_frame_buf);
7001       }
7002     }
7003   }
7004 }
7005 #endif  // DUMP_TPL_STATS
7006 #endif  // CONFIG_NON_GREEDY_MV
7007
7008 static void init_tpl_buffer(VP9_COMP *cpi) {
7009   VP9_COMMON *cm = &cpi->common;
7010   int frame;
7011
7012   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7013   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7014 #if CONFIG_NON_GREEDY_MV
7015   int rf_idx;
7016
7017   vpx_free(cpi->select_mv_arr);
7018   CHECK_MEM_ERROR(
7019       cm, cpi->select_mv_arr,
7020       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
7021 #endif
7022
7023   // TODO(jingning): Reduce the actual memory use for tpl model build up.
7024   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7025     if (cpi->tpl_stats[frame].width >= mi_cols &&
7026         cpi->tpl_stats[frame].height >= mi_rows &&
7027         cpi->tpl_stats[frame].tpl_stats_ptr)
7028       continue;
7029
7030 #if CONFIG_NON_GREEDY_MV
7031     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7032       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7033       CHECK_MEM_ERROR(
7034           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
7035           vpx_calloc(mi_rows * mi_cols * 4,
7036                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
7037       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7038       CHECK_MEM_ERROR(
7039           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
7040           vpx_calloc(mi_rows * mi_cols * 4,
7041                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
7042     }
7043 #endif
7044     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7045     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
7046                     vpx_calloc(mi_rows * mi_cols,
7047                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
7048     cpi->tpl_stats[frame].is_valid = 0;
7049     cpi->tpl_stats[frame].width = mi_cols;
7050     cpi->tpl_stats[frame].height = mi_rows;
7051     cpi->tpl_stats[frame].stride = mi_cols;
7052     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
7053     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7054   }
7055
7056   for (frame = 0; frame < REF_FRAMES; ++frame) {
7057     cpi->enc_frame_buf[frame].mem_valid = 0;
7058     cpi->enc_frame_buf[frame].released = 1;
7059   }
7060 }
7061
7062 static void free_tpl_buffer(VP9_COMP *cpi) {
7063   int frame;
7064 #if CONFIG_NON_GREEDY_MV
7065   vp9_free_motion_field_info(&cpi->motion_field_info);
7066   vpx_free(cpi->select_mv_arr);
7067 #endif
7068   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7069 #if CONFIG_NON_GREEDY_MV
7070     int rf_idx;
7071     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7072       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7073       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7074     }
7075 #endif
7076     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7077     cpi->tpl_stats[frame].is_valid = 0;
7078   }
7079 }
7080
7081 static void setup_tpl_stats(VP9_COMP *cpi) {
7082   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7083   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7084   int tpl_group_frames = 0;
7085   int frame_idx;
7086   cpi->tpl_bsize = BLOCK_32X32;
7087
7088   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7089
7090   init_tpl_stats(cpi);
7091
7092   // Backward propagation from tpl_group_frames to 1.
7093   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7094     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7095     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7096   }
7097 #if CONFIG_NON_GREEDY_MV
7098   cpi->tpl_ready = 1;
7099 #if DUMP_TPL_STATS
7100   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7101 #endif  // DUMP_TPL_STATS
7102 #endif  // CONFIG_NON_GREEDY_MV
7103 }
7104
7105 #if !CONFIG_REALTIME_ONLY
7106 #if CONFIG_RATE_CTRL
7107 static void copy_frame_counts(const FRAME_COUNTS *input_counts,
7108                               FRAME_COUNTS *output_counts) {
7109   int i, j, k, l, m, n;
7110   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
7111     for (j = 0; j < INTRA_MODES; ++j) {
7112       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
7113     }
7114   }
7115   for (i = 0; i < INTRA_MODES; ++i) {
7116     for (j = 0; j < INTRA_MODES; ++j) {
7117       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
7118     }
7119   }
7120   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
7121     for (j = 0; j < PARTITION_TYPES; ++j) {
7122       output_counts->partition[i][j] = input_counts->partition[i][j];
7123     }
7124   }
7125   for (i = 0; i < TX_SIZES; ++i) {
7126     for (j = 0; j < PLANE_TYPES; ++j) {
7127       for (k = 0; k < REF_TYPES; ++k) {
7128         for (l = 0; l < COEF_BANDS; ++l) {
7129           for (m = 0; m < COEFF_CONTEXTS; ++m) {
7130             output_counts->eob_branch[i][j][k][l][m] =
7131                 input_counts->eob_branch[i][j][k][l][m];
7132             for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
7133               output_counts->coef[i][j][k][l][m][n] =
7134                   input_counts->coef[i][j][k][l][m][n];
7135             }
7136           }
7137         }
7138       }
7139     }
7140   }
7141   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
7142     for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
7143       output_counts->switchable_interp[i][j] =
7144           input_counts->switchable_interp[i][j];
7145     }
7146   }
7147   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
7148     for (j = 0; j < INTER_MODES; ++j) {
7149       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
7150     }
7151   }
7152   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
7153     for (j = 0; j < 2; ++j) {
7154       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
7155     }
7156   }
7157   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
7158     for (j = 0; j < 2; ++j) {
7159       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
7160     }
7161   }
7162   for (i = 0; i < REF_CONTEXTS; ++i) {
7163     for (j = 0; j < 2; ++j) {
7164       for (k = 0; k < 2; ++k) {
7165         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
7166       }
7167     }
7168   }
7169   for (i = 0; i < REF_CONTEXTS; ++i) {
7170     for (j = 0; j < 2; ++j) {
7171       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
7172     }
7173   }
7174   for (i = 0; i < SKIP_CONTEXTS; ++i) {
7175     for (j = 0; j < 2; ++j) {
7176       output_counts->skip[i][j] = input_counts->skip[i][j];
7177     }
7178   }
7179   for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
7180     for (j = 0; j < TX_SIZES; j++) {
7181       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
7182     }
7183     for (j = 0; j < TX_SIZES - 1; j++) {
7184       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
7185     }
7186     for (j = 0; j < TX_SIZES - 2; j++) {
7187       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
7188     }
7189   }
7190   for (i = 0; i < TX_SIZES; i++) {
7191     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
7192   }
7193   for (i = 0; i < MV_JOINTS; i++) {
7194     output_counts->mv.joints[i] = input_counts->mv.joints[i];
7195   }
7196   for (k = 0; k < 2; k++) {
7197     nmv_component_counts *const comps = &output_counts->mv.comps[k];
7198     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
7199     for (i = 0; i < 2; i++) {
7200       comps->sign[i] = comps_t->sign[i];
7201       comps->class0_hp[i] = comps_t->class0_hp[i];
7202       comps->hp[i] = comps_t->hp[i];
7203     }
7204     for (i = 0; i < MV_CLASSES; i++) {
7205       comps->classes[i] = comps_t->classes[i];
7206     }
7207     for (i = 0; i < CLASS0_SIZE; i++) {
7208       comps->class0[i] = comps_t->class0[i];
7209       for (j = 0; j < MV_FP_SIZE; j++) {
7210         comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
7211       }
7212     }
7213     for (i = 0; i < MV_OFFSET_BITS; i++) {
7214       for (j = 0; j < 2; j++) {
7215         comps->bits[i][j] = comps_t->bits[i][j];
7216       }
7217     }
7218     for (i = 0; i < MV_FP_SIZE; i++) {
7219       comps->fp[i] = comps_t->fp[i];
7220     }
7221   }
7222 }
7223
7224 static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
7225                                         IMAGE_BUFFER *image_buffer) {
7226   const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
7227                                    yv12_buffer->v_buffer };
7228   const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
7229                                  yv12_buffer->uv_stride };
7230   const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
7231                         yv12_buffer->uv_crop_width };
7232   const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
7233                         yv12_buffer->uv_crop_height };
7234   int plane;
7235   for (plane = 0; plane < 3; ++plane) {
7236     const int src_stride = src_stride_ls[plane];
7237     const int w = w_ls[plane];
7238     const int h = h_ls[plane];
7239     const uint8_t *src_buf = src_buf_ls[plane];
7240     uint8_t *dst_buf = image_buffer->plane_buffer[plane];
7241     int r;
7242     assert(image_buffer->plane_width[plane] == w);
7243     assert(image_buffer->plane_height[plane] == h);
7244     for (r = 0; r < h; ++r) {
7245       memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
7246       src_buf += src_stride;
7247       dst_buf += w;
7248     }
7249   }
7250 }
7251 #endif  // CONFIG_RATE_CTRL
7252
7253 static void update_encode_frame_result(
7254     int show_idx, FRAME_UPDATE_TYPE update_type,
7255     const YV12_BUFFER_CONFIG *source_frame,
7256     const YV12_BUFFER_CONFIG *coded_frame, int quantize_index,
7257     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
7258 #if CONFIG_RATE_CTRL
7259     const PARTITION_INFO *partition_info,
7260     const MOTION_VECTOR_INFO *motion_vector_info,
7261 #endif  // CONFIG_RATE_CTRL
7262     ENCODE_FRAME_RESULT *encode_frame_result) {
7263 #if CONFIG_RATE_CTRL
7264   PSNR_STATS psnr;
7265 #if CONFIG_VP9_HIGHBITDEPTH
7266   vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
7267                        input_bit_depth);
7268 #else   // CONFIG_VP9_HIGHBITDEPTH
7269   (void)bit_depth;
7270   (void)input_bit_depth;
7271   vpx_calc_psnr(source_frame, coded_frame, &psnr);
7272 #endif  // CONFIG_VP9_HIGHBITDEPTH
7273   encode_frame_result->psnr = psnr.psnr[0];
7274   encode_frame_result->sse = psnr.sse[0];
7275   copy_frame_counts(counts, &encode_frame_result->frame_counts);
7276   encode_frame_result->partition_info = partition_info;
7277   encode_frame_result->motion_vector_info = motion_vector_info;
7278   if (encode_frame_result->coded_frame.allocated) {
7279     yv12_buffer_to_image_buffer(coded_frame, &encode_frame_result->coded_frame);
7280   }
7281 #else   // CONFIG_RATE_CTRL
7282   (void)bit_depth;
7283   (void)input_bit_depth;
7284   (void)source_frame;
7285   (void)coded_frame;
7286   (void)counts;
7287 #endif  // CONFIG_RATE_CTRL
7288   encode_frame_result->show_idx = show_idx;
7289   encode_frame_result->update_type = update_type;
7290   encode_frame_result->quantize_index = quantize_index;
7291 }
7292 #endif  // !CONFIG_REALTIME_ONLY
7293
7294 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
7295   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
7296 #if CONFIG_RATE_CTRL
7297   vp9_zero(encode_frame_result->coded_frame);
7298   encode_frame_result->coded_frame.allocated = 0;
7299 #endif  // CONFIG_RATE_CTRL
7300 }
7301
7302 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7303                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7304                             int64_t *time_end, int flush,
7305                             ENCODE_FRAME_RESULT *encode_frame_result) {
7306   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7307   VP9_COMMON *const cm = &cpi->common;
7308   BufferPool *const pool = cm->buffer_pool;
7309   RATE_CONTROL *const rc = &cpi->rc;
7310   struct vpx_usec_timer cmptimer;
7311   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7312   struct lookahead_entry *last_source = NULL;
7313   struct lookahead_entry *source = NULL;
7314   int arf_src_index;
7315   const int gf_group_index = cpi->twopass.gf_group.index;
7316   int i;
7317
7318   if (is_one_pass_cbr_svc(cpi)) {
7319     vp9_one_pass_cbr_svc_start_layer(cpi);
7320   }
7321
7322   vpx_usec_timer_start(&cmptimer);
7323
7324   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7325
7326   // Is multi-arf enabled.
7327   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7328   // will not work properly with svc.
7329   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7330   // is greater than or equal to 2.
7331   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7332     cpi->multi_layer_arf = 1;
7333   else
7334     cpi->multi_layer_arf = 0;
7335
7336   // Normal defaults
7337   cm->reset_frame_context = 0;
7338   cm->refresh_frame_context = 1;
7339   if (!is_one_pass_cbr_svc(cpi)) {
7340     cpi->refresh_last_frame = 1;
7341     cpi->refresh_golden_frame = 0;
7342     cpi->refresh_alt_ref_frame = 0;
7343   }
7344
7345   // Should we encode an arf frame.
7346   arf_src_index = get_arf_src_index(cpi);
7347
7348   if (arf_src_index) {
7349     for (i = 0; i <= arf_src_index; ++i) {
7350       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7351       // Avoid creating an alt-ref if there's a forced keyframe pending.
7352       if (e == NULL) {
7353         break;
7354       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7355         arf_src_index = 0;
7356         flush = 1;
7357         break;
7358       }
7359     }
7360   }
7361
7362   // Clear arf index stack before group of pictures processing starts.
7363   if (gf_group_index == 1) {
7364     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7365     cpi->twopass.gf_group.stack_size = 0;
7366   }
7367
7368   if (arf_src_index) {
7369     assert(arf_src_index <= rc->frames_to_key);
7370     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7371       cpi->alt_ref_source = source;
7372
7373 #if !CONFIG_REALTIME_ONLY
7374       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7375           (oxcf->arnr_strength > 0)) {
7376         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7377         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7378
7379         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7380         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7381
7382         // Produce the filtered ARF frame.
7383         vp9_temporal_filter(cpi, arf_src_index);
7384         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7385
7386         // for small bitrates segmentation overhead usually
7387         // eats all bitrate gain from enabling delta quantizers
7388         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7389           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7390
7391         force_src_buffer = &cpi->alt_ref_buffer;
7392       }
7393 #endif
7394       cm->show_frame = 0;
7395       cm->intra_only = 0;
7396       cpi->refresh_alt_ref_frame = 1;
7397       cpi->refresh_golden_frame = 0;
7398       cpi->refresh_last_frame = 0;
7399       rc->is_src_frame_alt_ref = 0;
7400       rc->source_alt_ref_pending = 0;
7401     } else {
7402       rc->source_alt_ref_pending = 0;
7403     }
7404   }
7405
7406   if (!source) {
7407     // Get last frame source.
7408     if (cm->current_video_frame > 0) {
7409       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7410         return -1;
7411     }
7412
7413     // Read in the source frame.
7414     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7415       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7416     else
7417       source = vp9_lookahead_pop(cpi->lookahead, flush);
7418
7419     if (source != NULL) {
7420       cm->show_frame = 1;
7421       cm->intra_only = 0;
7422       // If the flags indicate intra frame, but if the current picture is for
7423       // spatial layer above first_spatial_layer_to_encode, it should not be an
7424       // intra picture.
7425       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7426           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7427         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7428       }
7429
7430       // Check to see if the frame should be encoded as an arf overlay.
7431       check_src_altref(cpi, source);
7432     }
7433   }
7434
7435   if (source) {
7436     cpi->un_scaled_source = cpi->Source =
7437         force_src_buffer ? force_src_buffer : &source->img;
7438
7439 #ifdef ENABLE_KF_DENOISE
7440     // Copy of raw source for metrics calculation.
7441     if (is_psnr_calc_enabled(cpi))
7442       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7443 #endif
7444
7445     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7446
7447     *time_stamp = source->ts_start;
7448     *time_end = source->ts_end;
7449     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7450   } else {
7451     *size = 0;
7452     return -1;
7453   }
7454
7455   if (source->ts_start < cpi->first_time_stamp_ever) {
7456     cpi->first_time_stamp_ever = source->ts_start;
7457     cpi->last_end_time_stamp_seen = source->ts_start;
7458   }
7459
7460   // Clear down mmx registers
7461   vpx_clear_system_state();
7462
7463   // adjust frame rates based on timestamps given
7464   if (cm->show_frame) {
7465     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7466         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7467       vp9_svc_adjust_frame_rate(cpi);
7468     else
7469       adjust_frame_rate(cpi, source);
7470   }
7471
7472   if (is_one_pass_cbr_svc(cpi)) {
7473     vp9_update_temporal_layer_framerate(cpi);
7474     vp9_restore_layer_context(cpi);
7475   }
7476
7477   // Find a free buffer for the new frame, releasing the reference previously
7478   // held.
7479   if (cm->new_fb_idx != INVALID_IDX) {
7480     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7481   }
7482   cm->new_fb_idx = get_free_fb(cm);
7483
7484   if (cm->new_fb_idx == INVALID_IDX) return -1;
7485
7486   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7487
7488   // Start with a 0 size frame.
7489   *size = 0;
7490
7491   cpi->frame_flags = *frame_flags;
7492
7493 #if !CONFIG_REALTIME_ONLY
7494   if ((oxcf->pass == 2) && !cpi->use_svc) {
7495     vp9_rc_get_second_pass_params(cpi);
7496   } else if (oxcf->pass == 1) {
7497     set_frame_size(cpi);
7498   }
7499 #endif  // !CONFIG_REALTIME_ONLY
7500
7501   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7502       cpi->level_constraint.fail_flag == 0)
7503     level_rc_framerate(cpi, arf_src_index);
7504
7505   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7506     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7507   }
7508
7509   if (cpi->kmeans_data_arr_alloc == 0) {
7510     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7511     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7512 #if CONFIG_MULTITHREAD
7513     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7514 #endif
7515     CHECK_MEM_ERROR(
7516         cm, cpi->kmeans_data_arr,
7517         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7518     cpi->kmeans_data_stride = mi_cols;
7519     cpi->kmeans_data_arr_alloc = 1;
7520   }
7521
7522 #if CONFIG_NON_GREEDY_MV
7523   {
7524     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7525     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7526     Status status = vp9_alloc_motion_field_info(
7527         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7528     if (status == STATUS_FAILED) {
7529       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7530                          "vp9_alloc_motion_field_info failed");
7531     }
7532   }
7533 #endif  // CONFIG_NON_GREEDY_MV
7534
7535   if (gf_group_index == 1 &&
7536       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7537       cpi->sf.enable_tpl_model) {
7538     init_tpl_buffer(cpi);
7539     vp9_estimate_qp_gop(cpi);
7540     setup_tpl_stats(cpi);
7541   }
7542
7543 #if CONFIG_BITSTREAM_DEBUG
7544   assert(cpi->oxcf.max_threads == 0 &&
7545          "bitstream debug tool does not support multithreading");
7546   bitstream_queue_record_write();
7547 #endif
7548 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7549   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7550 #endif
7551
7552   cpi->td.mb.fp_src_pred = 0;
7553 #if CONFIG_REALTIME_ONLY
7554   (void)encode_frame_result;
7555   if (cpi->use_svc) {
7556     SvcEncode(cpi, size, dest, frame_flags);
7557   } else {
7558     // One pass encode
7559     Pass0Encode(cpi, size, dest, frame_flags);
7560   }
7561 #else  // !CONFIG_REALTIME_ONLY
7562   if (oxcf->pass == 1 && !cpi->use_svc) {
7563     const int lossless = is_lossless_requested(oxcf);
7564 #if CONFIG_VP9_HIGHBITDEPTH
7565     if (cpi->oxcf.use_highbitdepth)
7566       cpi->td.mb.fwd_txfm4x4 =
7567           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7568     else
7569       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7570     cpi->td.mb.highbd_inv_txfm_add =
7571         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7572 #else
7573     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7574 #endif  // CONFIG_VP9_HIGHBITDEPTH
7575     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7576     vp9_first_pass(cpi, source);
7577   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7578     Pass2Encode(cpi, size, dest, frame_flags);
7579     // update_encode_frame_result() depends on twopass.gf_group.index and
7580     // cm->new_fb_idx and cpi->Source are updated for current properly and have
7581     // not been updated for the next frame yet.
7582     // The update locations are as follows.
7583     // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
7584     // for the first frame in the gf_group and is updated for the next frame at
7585     // vp9_twopass_postencode_update().
7586     // 2) cpi->Source is updated at the beginning of this function, i.e.
7587     // vp9_get_compressed_data()
7588     // 3) cm->new_fb_idx is updated at the beginning of this function by
7589     // get_free_fb(cm)
7590     // TODO(angiebird): Improve the codebase to make the update of frame
7591     // dependent variables more robust.
7592     update_encode_frame_result(
7593         source->show_idx,
7594         cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
7595         cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi),
7596         cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
7597 #if CONFIG_RATE_CTRL
7598         cpi->partition_info, cpi->motion_vector_info,
7599 #endif  // CONFIG_RATE_CTRL
7600         encode_frame_result);
7601     vp9_twopass_postencode_update(cpi);
7602   } else if (cpi->use_svc) {
7603     SvcEncode(cpi, size, dest, frame_flags);
7604   } else {
7605     // One pass encode
7606     Pass0Encode(cpi, size, dest, frame_flags);
7607   }
7608 #endif  // CONFIG_REALTIME_ONLY
7609
7610   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7611
7612   if (cm->refresh_frame_context)
7613     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7614
7615   // No frame encoded, or frame was dropped, release scaled references.
7616   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7617     release_scaled_references(cpi);
7618   }
7619
7620   if (*size > 0) {
7621     cpi->droppable = !frame_is_reference(cpi);
7622   }
7623
7624   // Save layer specific state.
7625   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7626                                     cpi->svc.number_spatial_layers > 1) &&
7627                                    oxcf->pass == 2)) {
7628     vp9_save_layer_context(cpi);
7629   }
7630
7631   vpx_usec_timer_mark(&cmptimer);
7632   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7633
7634   if (cpi->keep_level_stats && oxcf->pass != 1)
7635     update_level_info(cpi, size, arf_src_index);
7636
7637 #if CONFIG_INTERNAL_STATS
7638
7639   if (oxcf->pass != 1) {
7640     double samples = 0.0;
7641     cpi->bytes += (int)(*size);
7642
7643     if (cm->show_frame) {
7644       uint32_t bit_depth = 8;
7645       uint32_t in_bit_depth = 8;
7646       cpi->count++;
7647 #if CONFIG_VP9_HIGHBITDEPTH
7648       if (cm->use_highbitdepth) {
7649         in_bit_depth = cpi->oxcf.input_bit_depth;
7650         bit_depth = cm->bit_depth;
7651       }
7652 #endif
7653
7654       if (cpi->b_calculate_psnr) {
7655         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7656         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7657         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7658         PSNR_STATS psnr;
7659 #if CONFIG_VP9_HIGHBITDEPTH
7660         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7661                              in_bit_depth);
7662 #else
7663         vpx_calc_psnr(orig, recon, &psnr);
7664 #endif  // CONFIG_VP9_HIGHBITDEPTH
7665
7666         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7667                           psnr.psnr[0], &cpi->psnr);
7668         cpi->total_sq_error += psnr.sse[0];
7669         cpi->total_samples += psnr.samples[0];
7670         samples = psnr.samples[0];
7671
7672         {
7673           PSNR_STATS psnr2;
7674           double frame_ssim2 = 0, weight = 0;
7675 #if CONFIG_VP9_POSTPROC
7676           if (vpx_alloc_frame_buffer(
7677                   pp, recon->y_crop_width, recon->y_crop_height,
7678                   cm->subsampling_x, cm->subsampling_y,
7679 #if CONFIG_VP9_HIGHBITDEPTH
7680                   cm->use_highbitdepth,
7681 #endif
7682                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7683             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7684                                "Failed to allocate post processing buffer");
7685           }
7686           {
7687             vp9_ppflags_t ppflags;
7688             ppflags.post_proc_flag = VP9D_DEBLOCK;
7689             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7690             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7691             vp9_post_proc_frame(cm, pp, &ppflags,
7692                                 cpi->un_scaled_source->y_width);
7693           }
7694 #endif
7695           vpx_clear_system_state();
7696
7697 #if CONFIG_VP9_HIGHBITDEPTH
7698           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7699                                cpi->oxcf.input_bit_depth);
7700 #else
7701           vpx_calc_psnr(orig, pp, &psnr2);
7702 #endif  // CONFIG_VP9_HIGHBITDEPTH
7703
7704           cpi->totalp_sq_error += psnr2.sse[0];
7705           cpi->totalp_samples += psnr2.samples[0];
7706           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7707                             psnr2.psnr[0], &cpi->psnrp);
7708
7709 #if CONFIG_VP9_HIGHBITDEPTH
7710           if (cm->use_highbitdepth) {
7711             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7712                                                in_bit_depth);
7713           } else {
7714             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7715           }
7716 #else
7717           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7718 #endif  // CONFIG_VP9_HIGHBITDEPTH
7719
7720           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7721           cpi->summed_quality += frame_ssim2 * weight;
7722           cpi->summed_weights += weight;
7723
7724 #if CONFIG_VP9_HIGHBITDEPTH
7725           if (cm->use_highbitdepth) {
7726             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7727                                                in_bit_depth);
7728           } else {
7729             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7730           }
7731 #else
7732           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7733 #endif  // CONFIG_VP9_HIGHBITDEPTH
7734
7735           cpi->summedp_quality += frame_ssim2 * weight;
7736           cpi->summedp_weights += weight;
7737 #if 0
7738           if (cm->show_frame) {
7739             FILE *f = fopen("q_used.stt", "a");
7740             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7741                     cpi->common.current_video_frame, psnr2.psnr[1],
7742                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7743             fclose(f);
7744           }
7745 #endif
7746         }
7747       }
7748       if (cpi->b_calculate_blockiness) {
7749 #if CONFIG_VP9_HIGHBITDEPTH
7750         if (!cm->use_highbitdepth)
7751 #endif
7752         {
7753           double frame_blockiness = vp9_get_blockiness(
7754               cpi->Source->y_buffer, cpi->Source->y_stride,
7755               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7756               cpi->Source->y_width, cpi->Source->y_height);
7757           cpi->worst_blockiness =
7758               VPXMAX(cpi->worst_blockiness, frame_blockiness);
7759           cpi->total_blockiness += frame_blockiness;
7760         }
7761       }
7762
7763       if (cpi->b_calculate_consistency) {
7764 #if CONFIG_VP9_HIGHBITDEPTH
7765         if (!cm->use_highbitdepth)
7766 #endif
7767         {
7768           double this_inconsistency = vpx_get_ssim_metrics(
7769               cpi->Source->y_buffer, cpi->Source->y_stride,
7770               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7771               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
7772               &cpi->metrics, 1);
7773
7774           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
7775           double consistency =
7776               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
7777           if (consistency > 0.0)
7778             cpi->worst_consistency =
7779                 VPXMIN(cpi->worst_consistency, consistency);
7780           cpi->total_inconsistency += this_inconsistency;
7781         }
7782       }
7783
7784       {
7785         double y, u, v, frame_all;
7786         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
7787                                       &v, bit_depth, in_bit_depth);
7788         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
7789       }
7790       {
7791         double y, u, v, frame_all;
7792         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
7793                                 bit_depth, in_bit_depth);
7794         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
7795       }
7796     }
7797   }
7798
7799 #endif
7800
7801   if (is_one_pass_cbr_svc(cpi)) {
7802     if (cm->show_frame) {
7803       ++cpi->svc.spatial_layer_to_encode;
7804       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
7805         cpi->svc.spatial_layer_to_encode = 0;
7806     }
7807   }
7808
7809   vpx_clear_system_state();
7810   return 0;
7811 }
7812
7813 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
7814                               vp9_ppflags_t *flags) {
7815   VP9_COMMON *cm = &cpi->common;
7816 #if !CONFIG_VP9_POSTPROC
7817   (void)flags;
7818 #endif
7819
7820   if (!cm->show_frame) {
7821     return -1;
7822   } else {
7823     int ret;
7824 #if CONFIG_VP9_POSTPROC
7825     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
7826 #else
7827     if (cm->frame_to_show) {
7828       *dest = *cm->frame_to_show;
7829       dest->y_width = cm->width;
7830       dest->y_height = cm->height;
7831       dest->uv_width = cm->width >> cm->subsampling_x;
7832       dest->uv_height = cm->height >> cm->subsampling_y;
7833       ret = 0;
7834     } else {
7835       ret = -1;
7836     }
7837 #endif  // !CONFIG_VP9_POSTPROC
7838     vpx_clear_system_state();
7839     return ret;
7840   }
7841 }
7842
7843 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
7844                           VPX_SCALING vert_mode) {
7845   VP9_COMMON *cm = &cpi->common;
7846   int hr = 0, hs = 0, vr = 0, vs = 0;
7847
7848   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
7849
7850   Scale2Ratio(horiz_mode, &hr, &hs);
7851   Scale2Ratio(vert_mode, &vr, &vs);
7852
7853   // always go to the next whole number
7854   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
7855   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
7856   if (cm->current_video_frame) {
7857     assert(cm->width <= cpi->initial_width);
7858     assert(cm->height <= cpi->initial_height);
7859   }
7860
7861   update_frame_size(cpi);
7862
7863   return 0;
7864 }
7865
7866 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
7867                          unsigned int height) {
7868   VP9_COMMON *cm = &cpi->common;
7869 #if CONFIG_VP9_HIGHBITDEPTH
7870   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
7871 #else
7872   update_initial_width(cpi, 0, 1, 1);
7873 #endif  // CONFIG_VP9_HIGHBITDEPTH
7874
7875 #if CONFIG_VP9_TEMPORAL_DENOISING
7876   setup_denoiser_buffer(cpi);
7877 #endif
7878   alloc_raw_frame_buffers(cpi);
7879   if (width) {
7880     cm->width = width;
7881     if (cm->width > cpi->initial_width) {
7882       cm->width = cpi->initial_width;
7883       printf("Warning: Desired width too large, changed to %d\n", cm->width);
7884     }
7885   }
7886
7887   if (height) {
7888     cm->height = height;
7889     if (cm->height > cpi->initial_height) {
7890       cm->height = cpi->initial_height;
7891       printf("Warning: Desired height too large, changed to %d\n", cm->height);
7892     }
7893   }
7894   assert(cm->width <= cpi->initial_width);
7895   assert(cm->height <= cpi->initial_height);
7896
7897   update_frame_size(cpi);
7898
7899   return 0;
7900 }
7901
7902 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
7903   cpi->use_svc = use_svc;
7904   return;
7905 }
7906
7907 int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
7908
7909 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
7910   if (flags &
7911       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
7912     int ref = 7;
7913
7914     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
7915
7916     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
7917
7918     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
7919
7920     vp9_use_as_reference(cpi, ref);
7921   }
7922
7923   if (flags &
7924       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
7925        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
7926     int upd = 7;
7927
7928     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
7929
7930     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
7931
7932     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
7933
7934     vp9_update_reference(cpi, upd);
7935   }
7936
7937   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
7938     vp9_update_entropy(cpi, 0);
7939   }
7940 }
7941
7942 void vp9_set_row_mt(VP9_COMP *cpi) {
7943   // Enable row based multi-threading for supported modes of encoding
7944   cpi->row_mt = 0;
7945   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
7946        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
7947       cpi->oxcf.row_mt && !cpi->use_svc)
7948     cpi->row_mt = 1;
7949
7950   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
7951       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
7952       !cpi->use_svc)
7953     cpi->row_mt = 1;
7954
7955   // In realtime mode, enable row based multi-threading for all the speed levels
7956   // where non-rd path is used.
7957   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
7958     cpi->row_mt = 1;
7959   }
7960
7961   if (cpi->row_mt)
7962     cpi->row_mt_bit_exact = 1;
7963   else
7964     cpi->row_mt_bit_exact = 0;
7965 }