granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI structure in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024 #if CONFIG_RATE_CTRL
1025   free_partition_info(cpi);
1026   free_motion_vector_info(cpi);
1027   free_fp_motion_vector_info(cpi);
1028 #endif
1029
1030   vp9_free_ref_frame_buffers(cm->buffer_pool);
1031 #if CONFIG_VP9_POSTPROC
1032   vp9_free_postproc_buffers(cm);
1033 #endif
1034   vp9_free_context_buffers(cm);
1035
1036   vpx_free_frame_buffer(&cpi->last_frame_uf);
1037   vpx_free_frame_buffer(&cpi->scaled_source);
1038   vpx_free_frame_buffer(&cpi->scaled_last_source);
1039   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1040 #ifdef ENABLE_KF_DENOISE
1041   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1042   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1043 #endif
1044
1045   vp9_lookahead_destroy(cpi->lookahead);
1046
1047   vpx_free(cpi->tile_tok[0][0]);
1048   cpi->tile_tok[0][0] = 0;
1049
1050   vpx_free(cpi->tplist[0][0]);
1051   cpi->tplist[0][0] = NULL;
1052
1053   vp9_free_pc_tree(&cpi->td);
1054
1055   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1056     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1057     vpx_free(lc->rc_twopass_stats_in.buf);
1058     lc->rc_twopass_stats_in.buf = NULL;
1059     lc->rc_twopass_stats_in.sz = 0;
1060   }
1061
1062   if (cpi->source_diff_var != NULL) {
1063     vpx_free(cpi->source_diff_var);
1064     cpi->source_diff_var = NULL;
1065   }
1066
1067   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1068     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1069   }
1070   memset(&cpi->svc.scaled_frames[0], 0,
1071          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1072
1073   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1074   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1075
1076   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1077   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1078
1079   vp9_free_svc_cyclic_refresh(cpi);
1080 }
1081
1082 static void save_coding_context(VP9_COMP *cpi) {
1083   CODING_CONTEXT *const cc = &cpi->coding_context;
1084   VP9_COMMON *cm = &cpi->common;
1085
1086   // Stores a snapshot of key state variables which can subsequently be
1087   // restored with a call to vp9_restore_coding_context. These functions are
1088   // intended for use in a re-code loop in vp9_compress_frame where the
1089   // quantizer value is adjusted between loop iterations.
1090   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1091
1092   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1093          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1094   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1095          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1096   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1097          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1098   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1099          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1100
1101   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1102
1103   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1104          (cm->mi_rows * cm->mi_cols));
1105
1106   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1107   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1108
1109   cc->fc = *cm->fc;
1110 }
1111
1112 static void restore_coding_context(VP9_COMP *cpi) {
1113   CODING_CONTEXT *const cc = &cpi->coding_context;
1114   VP9_COMMON *cm = &cpi->common;
1115
1116   // Restore key state variables to the snapshot state stored in the
1117   // previous call to vp9_save_coding_context.
1118   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1119
1120   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1121   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1122   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1123          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1124   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1125          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1126
1127   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1128
1129   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1130          (cm->mi_rows * cm->mi_cols));
1131
1132   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1133   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1134
1135   *cm->fc = cc->fc;
1136 }
1137
1138 #if !CONFIG_REALTIME_ONLY
1139 static void configure_static_seg_features(VP9_COMP *cpi) {
1140   VP9_COMMON *const cm = &cpi->common;
1141   const RATE_CONTROL *const rc = &cpi->rc;
1142   struct segmentation *const seg = &cm->seg;
1143
1144   int high_q = (int)(rc->avg_q > 48.0);
1145   int qi_delta;
1146
1147   // Disable and clear down for KF
1148   if (cm->frame_type == KEY_FRAME) {
1149     // Clear down the global segmentation map
1150     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1151     seg->update_map = 0;
1152     seg->update_data = 0;
1153     cpi->static_mb_pct = 0;
1154
1155     // Disable segmentation
1156     vp9_disable_segmentation(seg);
1157
1158     // Clear down the segment features.
1159     vp9_clearall_segfeatures(seg);
1160   } else if (cpi->refresh_alt_ref_frame) {
1161     // If this is an alt ref frame
1162     // Clear down the global segmentation map
1163     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1164     seg->update_map = 0;
1165     seg->update_data = 0;
1166     cpi->static_mb_pct = 0;
1167
1168     // Disable segmentation and individual segment features by default
1169     vp9_disable_segmentation(seg);
1170     vp9_clearall_segfeatures(seg);
1171
1172     // Scan frames from current to arf frame.
1173     // This function re-enables segmentation if appropriate.
1174     vp9_update_mbgraph_stats(cpi);
1175
1176     // If segmentation was enabled set those features needed for the
1177     // arf itself.
1178     if (seg->enabled) {
1179       seg->update_map = 1;
1180       seg->update_data = 1;
1181
1182       qi_delta =
1183           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1184       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1185       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1186
1187       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1188       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1189
1190       // Where relevant assume segment data is delta data
1191       seg->abs_delta = SEGMENT_DELTADATA;
1192     }
1193   } else if (seg->enabled) {
1194     // All other frames if segmentation has been enabled
1195
1196     // First normal frame in a valid gf or alt ref group
1197     if (rc->frames_since_golden == 0) {
1198       // Set up segment features for normal frames in an arf group
1199       if (rc->source_alt_ref_active) {
1200         seg->update_map = 0;
1201         seg->update_data = 1;
1202         seg->abs_delta = SEGMENT_DELTADATA;
1203
1204         qi_delta =
1205             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1206         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1207         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1208
1209         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1210         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1211
1212         // Segment coding disabled for compred testing
1213         if (high_q || (cpi->static_mb_pct == 100)) {
1214           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1215           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1216           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1217         }
1218       } else {
1219         // Disable segmentation and clear down features if alt ref
1220         // is not active for this group
1221
1222         vp9_disable_segmentation(seg);
1223
1224         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1225
1226         seg->update_map = 0;
1227         seg->update_data = 0;
1228
1229         vp9_clearall_segfeatures(seg);
1230       }
1231     } else if (rc->is_src_frame_alt_ref) {
1232       // Special case where we are coding over the top of a previous
1233       // alt ref frame.
1234       // Segment coding disabled for compred testing
1235
1236       // Enable ref frame features for segment 0 as well
1237       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1238       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1239
1240       // All mbs should use ALTREF_FRAME
1241       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1242       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1243       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1244       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1245
1246       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1247       if (high_q) {
1248         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1249         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1250       }
1251       // Enable data update
1252       seg->update_data = 1;
1253     } else {
1254       // All other frames.
1255
1256       // No updates.. leave things as they are.
1257       seg->update_map = 0;
1258       seg->update_data = 0;
1259     }
1260   }
1261 }
1262 #endif  // !CONFIG_REALTIME_ONLY
1263
1264 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1265   VP9_COMMON *const cm = &cpi->common;
1266   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1267   uint8_t *cache_ptr = cm->last_frame_seg_map;
1268   int row, col;
1269
1270   for (row = 0; row < cm->mi_rows; row++) {
1271     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1272     uint8_t *cache = cache_ptr;
1273     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1274       cache[0] = mi_8x8[0]->segment_id;
1275     mi_8x8_ptr += cm->mi_stride;
1276     cache_ptr += cm->mi_cols;
1277   }
1278 }
1279
1280 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1281   VP9_COMMON *cm = &cpi->common;
1282   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1283
1284   if (!cpi->lookahead)
1285     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1286                                         cm->subsampling_x, cm->subsampling_y,
1287 #if CONFIG_VP9_HIGHBITDEPTH
1288                                         cm->use_highbitdepth,
1289 #endif
1290                                         oxcf->lag_in_frames);
1291   if (!cpi->lookahead)
1292     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1293                        "Failed to allocate lag buffers");
1294
1295   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1296   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1297                                cm->subsampling_x, cm->subsampling_y,
1298 #if CONFIG_VP9_HIGHBITDEPTH
1299                                cm->use_highbitdepth,
1300 #endif
1301                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1302                                NULL, NULL, NULL))
1303     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1304                        "Failed to allocate altref buffer");
1305 }
1306
1307 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1308   VP9_COMMON *const cm = &cpi->common;
1309   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1310                                cm->subsampling_x, cm->subsampling_y,
1311 #if CONFIG_VP9_HIGHBITDEPTH
1312                                cm->use_highbitdepth,
1313 #endif
1314                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1315                                NULL, NULL, NULL))
1316     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1317                        "Failed to allocate last frame buffer");
1318
1319   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1320                                cm->subsampling_x, cm->subsampling_y,
1321 #if CONFIG_VP9_HIGHBITDEPTH
1322                                cm->use_highbitdepth,
1323 #endif
1324                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1325                                NULL, NULL, NULL))
1326     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1327                        "Failed to allocate scaled source buffer");
1328
1329   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1330   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1331   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1332   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1333       cpi->svc.number_spatial_layers > 2) {
1334     cpi->svc.scaled_temp_is_alloc = 1;
1335     if (vpx_realloc_frame_buffer(
1336             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1337             cm->subsampling_x, cm->subsampling_y,
1338 #if CONFIG_VP9_HIGHBITDEPTH
1339             cm->use_highbitdepth,
1340 #endif
1341             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1342       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1343                          "Failed to allocate scaled_frame for svc ");
1344   }
1345
1346   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1347                                cm->subsampling_x, cm->subsampling_y,
1348 #if CONFIG_VP9_HIGHBITDEPTH
1349                                cm->use_highbitdepth,
1350 #endif
1351                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1352                                NULL, NULL, NULL))
1353     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1354                        "Failed to allocate scaled last source buffer");
1355 #ifdef ENABLE_KF_DENOISE
1356   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1357                                cm->subsampling_x, cm->subsampling_y,
1358 #if CONFIG_VP9_HIGHBITDEPTH
1359                                cm->use_highbitdepth,
1360 #endif
1361                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1362                                NULL, NULL, NULL))
1363     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1364                        "Failed to allocate unscaled raw source frame buffer");
1365
1366   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1367                                cm->subsampling_x, cm->subsampling_y,
1368 #if CONFIG_VP9_HIGHBITDEPTH
1369                                cm->use_highbitdepth,
1370 #endif
1371                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1372                                NULL, NULL, NULL))
1373     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1374                        "Failed to allocate scaled raw source frame buffer");
1375 #endif
1376 }
1377
1378 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1379   VP9_COMMON *cm = &cpi->common;
1380   int mi_size = cm->mi_cols * cm->mi_rows;
1381
1382   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1383   if (!cpi->mbmi_ext_base) return 1;
1384
1385   return 0;
1386 }
1387
1388 static void alloc_compressor_data(VP9_COMP *cpi) {
1389   VP9_COMMON *cm = &cpi->common;
1390   int sb_rows;
1391
1392   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1393
1394   alloc_context_buffers_ext(cpi);
1395
1396   vpx_free(cpi->tile_tok[0][0]);
1397
1398   {
1399     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1400     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1401                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1402   }
1403
1404   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1405   vpx_free(cpi->tplist[0][0]);
1406   CHECK_MEM_ERROR(
1407       cm, cpi->tplist[0][0],
1408       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1409
1410   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1411 }
1412
1413 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1414   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1415   vp9_rc_update_framerate(cpi);
1416 }
1417
1418 static void set_tile_limits(VP9_COMP *cpi) {
1419   VP9_COMMON *const cm = &cpi->common;
1420
1421   int min_log2_tile_cols, max_log2_tile_cols;
1422   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1423
1424   cm->log2_tile_cols =
1425       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1426   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1427
1428   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1429     const int level_tile_cols =
1430         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1431     if (cm->log2_tile_cols > level_tile_cols) {
1432       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1433     }
1434   }
1435 }
1436
1437 static void update_frame_size(VP9_COMP *cpi) {
1438   VP9_COMMON *const cm = &cpi->common;
1439   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1440
1441   vp9_set_mb_mi(cm, cm->width, cm->height);
1442   vp9_init_context_buffers(cm);
1443   vp9_init_macroblockd(cm, xd, NULL);
1444   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1445   memset(cpi->mbmi_ext_base, 0,
1446          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1447
1448   set_tile_limits(cpi);
1449 }
1450
1451 static void init_buffer_indices(VP9_COMP *cpi) {
1452   int ref_frame;
1453
1454   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1455     cpi->ref_fb_idx[ref_frame] = ref_frame;
1456
1457   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1458   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1459   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1460 }
1461
1462 static void init_level_constraint(LevelConstraint *lc) {
1463   lc->level_index = -1;
1464   lc->max_cpb_size = INT_MAX;
1465   lc->max_frame_size = INT_MAX;
1466   lc->fail_flag = 0;
1467 }
1468
1469 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1470   vpx_clear_system_state();
1471   ls->level_index = level_index;
1472   if (level_index >= 0) {
1473     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1474   }
1475 }
1476
1477 static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1478   VP9_COMMON *const cm = &cpi->common;
1479
1480   cpi->oxcf = *oxcf;
1481   cpi->framerate = oxcf->init_framerate;
1482   cm->profile = oxcf->profile;
1483   cm->bit_depth = oxcf->bit_depth;
1484 #if CONFIG_VP9_HIGHBITDEPTH
1485   cm->use_highbitdepth = oxcf->use_highbitdepth;
1486 #endif
1487   cm->color_space = oxcf->color_space;
1488   cm->color_range = oxcf->color_range;
1489
1490   cpi->target_level = oxcf->target_level;
1491   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1492   set_level_constraint(&cpi->level_constraint,
1493                        get_level_index(cpi->target_level));
1494
1495   cm->width = oxcf->width;
1496   cm->height = oxcf->height;
1497   alloc_compressor_data(cpi);
1498
1499   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1500
1501   // Single thread case: use counts in common.
1502   cpi->td.counts = &cm->counts;
1503
1504   // Spatial scalability.
1505   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1506   // Temporal scalability.
1507   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1508
1509   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1510       ((cpi->svc.number_temporal_layers > 1 ||
1511         cpi->svc.number_spatial_layers > 1) &&
1512        cpi->oxcf.pass != 1)) {
1513     vp9_init_layer_context(cpi);
1514   }
1515
1516   // change includes all joint functionality
1517   vp9_change_config(cpi, oxcf);
1518
1519   cpi->static_mb_pct = 0;
1520   cpi->ref_frame_flags = 0;
1521
1522   init_buffer_indices(cpi);
1523
1524   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1525 }
1526
1527 void vp9_check_reset_rc_flag(VP9_COMP *cpi) {
1528   RATE_CONTROL *rc = &cpi->rc;
1529
1530   if (cpi->common.current_video_frame >
1531       (unsigned int)cpi->svc.number_spatial_layers) {
1532     if (cpi->use_svc) {
1533       vp9_svc_check_reset_layer_rc_flag(cpi);
1534     } else {
1535       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
1536           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
1537         rc->rc_1_frame = 0;
1538         rc->rc_2_frame = 0;
1539         rc->bits_off_target = rc->optimal_buffer_level;
1540         rc->buffer_level = rc->optimal_buffer_level;
1541       }
1542     }
1543   }
1544 }
1545
1546 void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
1547   RATE_CONTROL *rc = &cpi->rc;
1548   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1549
1550   const int64_t bandwidth = oxcf->target_bandwidth;
1551   const int64_t starting = oxcf->starting_buffer_level_ms;
1552   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1553   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1554
1555   rc->starting_buffer_level = starting * bandwidth / 1000;
1556   rc->optimal_buffer_level =
1557       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1558   rc->maximum_buffer_size =
1559       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1560
1561   // Under a configuration change, where maximum_buffer_size may change,
1562   // keep buffer level clipped to the maximum allowed buffer size.
1563   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1564   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
1565 }
1566
1567 #if CONFIG_VP9_HIGHBITDEPTH
1568 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1569 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1570   cpi->fn_ptr[BT].sdf = SDF;                             \
1571   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1572   cpi->fn_ptr[BT].vf = VF;                               \
1573   cpi->fn_ptr[BT].svf = SVF;                             \
1574   cpi->fn_ptr[BT].svaf = SVAF;                           \
1575   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1576   cpi->fn_ptr[BT].sdx8f = NULL;
1577
1578 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1579   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1580                                      int source_stride,                        \
1581                                      const uint8_t *ref_ptr, int ref_stride) { \
1582     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1583   }                                                                            \
1584   static unsigned int fnname##_bits10(                                         \
1585       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1586       int ref_stride) {                                                        \
1587     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1588   }                                                                            \
1589   static unsigned int fnname##_bits12(                                         \
1590       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1591       int ref_stride) {                                                        \
1592     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1593   }
1594
1595 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1596   static unsigned int fnname##_bits8(                                          \
1597       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1598       int ref_stride, const uint8_t *second_pred) {                            \
1599     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1600   }                                                                            \
1601   static unsigned int fnname##_bits10(                                         \
1602       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1603       int ref_stride, const uint8_t *second_pred) {                            \
1604     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1605            2;                                                                  \
1606   }                                                                            \
1607   static unsigned int fnname##_bits12(                                         \
1608       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1609       int ref_stride, const uint8_t *second_pred) {                            \
1610     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1611            4;                                                                  \
1612   }
1613
1614 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1615   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1616                              const uint8_t *const ref_ptr[], int ref_stride,  \
1617                              unsigned int *sad_array) {                       \
1618     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1619   }                                                                           \
1620   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1621                               const uint8_t *const ref_ptr[], int ref_stride, \
1622                               unsigned int *sad_array) {                      \
1623     int i;                                                                    \
1624     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1625     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1626   }                                                                           \
1627   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1628                               const uint8_t *const ref_ptr[], int ref_stride, \
1629                               unsigned int *sad_array) {                      \
1630     int i;                                                                    \
1631     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1632     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1633   }
1634
1635 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1636 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1637 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1638 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1639 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1640 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1641 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1642 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1643 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1644 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1645 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1646 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1647 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1648 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1649 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1650 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1651 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1652 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1653 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1654 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1655 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1656 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1657 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1658 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1659 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1660 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1661 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1662 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1663 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1664 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1665 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1666 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1667 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1668 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1669 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1670 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1671 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1672 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1673 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1674
1675 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1676   VP9_COMMON *const cm = &cpi->common;
1677   if (cm->use_highbitdepth) {
1678     switch (cm->bit_depth) {
1679       case VPX_BITS_8:
1680         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1681                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1682                    vpx_highbd_8_sub_pixel_variance32x16,
1683                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1684                    vpx_highbd_sad32x16x4d_bits8)
1685
1686         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1687                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1688                    vpx_highbd_8_sub_pixel_variance16x32,
1689                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1690                    vpx_highbd_sad16x32x4d_bits8)
1691
1692         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1693                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1694                    vpx_highbd_8_sub_pixel_variance64x32,
1695                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1696                    vpx_highbd_sad64x32x4d_bits8)
1697
1698         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1699                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1700                    vpx_highbd_8_sub_pixel_variance32x64,
1701                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1702                    vpx_highbd_sad32x64x4d_bits8)
1703
1704         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1705                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1706                    vpx_highbd_8_sub_pixel_variance32x32,
1707                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1708                    vpx_highbd_sad32x32x4d_bits8)
1709
1710         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1711                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1712                    vpx_highbd_8_sub_pixel_variance64x64,
1713                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1714                    vpx_highbd_sad64x64x4d_bits8)
1715
1716         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1717                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1718                    vpx_highbd_8_sub_pixel_variance16x16,
1719                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1720                    vpx_highbd_sad16x16x4d_bits8)
1721
1722         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1723                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1724                    vpx_highbd_8_sub_pixel_variance16x8,
1725                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1726                    vpx_highbd_sad16x8x4d_bits8)
1727
1728         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1729                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1730                    vpx_highbd_8_sub_pixel_variance8x16,
1731                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1732                    vpx_highbd_sad8x16x4d_bits8)
1733
1734         HIGHBD_BFP(
1735             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1736             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1737             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1738
1739         HIGHBD_BFP(
1740             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1741             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1742             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1743
1744         HIGHBD_BFP(
1745             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1746             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1747             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1748
1749         HIGHBD_BFP(
1750             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1751             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1752             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1753         break;
1754
1755       case VPX_BITS_10:
1756         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1757                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1758                    vpx_highbd_10_sub_pixel_variance32x16,
1759                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1760                    vpx_highbd_sad32x16x4d_bits10)
1761
1762         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1763                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1764                    vpx_highbd_10_sub_pixel_variance16x32,
1765                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1766                    vpx_highbd_sad16x32x4d_bits10)
1767
1768         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1769                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1770                    vpx_highbd_10_sub_pixel_variance64x32,
1771                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1772                    vpx_highbd_sad64x32x4d_bits10)
1773
1774         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1775                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1776                    vpx_highbd_10_sub_pixel_variance32x64,
1777                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1778                    vpx_highbd_sad32x64x4d_bits10)
1779
1780         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1781                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1782                    vpx_highbd_10_sub_pixel_variance32x32,
1783                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1784                    vpx_highbd_sad32x32x4d_bits10)
1785
1786         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1787                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1788                    vpx_highbd_10_sub_pixel_variance64x64,
1789                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1790                    vpx_highbd_sad64x64x4d_bits10)
1791
1792         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1793                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1794                    vpx_highbd_10_sub_pixel_variance16x16,
1795                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1796                    vpx_highbd_sad16x16x4d_bits10)
1797
1798         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1799                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1800                    vpx_highbd_10_sub_pixel_variance16x8,
1801                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1802                    vpx_highbd_sad16x8x4d_bits10)
1803
1804         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1805                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1806                    vpx_highbd_10_sub_pixel_variance8x16,
1807                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1808                    vpx_highbd_sad8x16x4d_bits10)
1809
1810         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1811                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1812                    vpx_highbd_10_sub_pixel_variance8x8,
1813                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1814                    vpx_highbd_sad8x8x4d_bits10)
1815
1816         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1817                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1818                    vpx_highbd_10_sub_pixel_variance8x4,
1819                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1820                    vpx_highbd_sad8x4x4d_bits10)
1821
1822         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1823                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1824                    vpx_highbd_10_sub_pixel_variance4x8,
1825                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1826                    vpx_highbd_sad4x8x4d_bits10)
1827
1828         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1829                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1830                    vpx_highbd_10_sub_pixel_variance4x4,
1831                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1832                    vpx_highbd_sad4x4x4d_bits10)
1833         break;
1834
1835       default:
1836         assert(cm->bit_depth == VPX_BITS_12);
1837         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1838                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1839                    vpx_highbd_12_sub_pixel_variance32x16,
1840                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1841                    vpx_highbd_sad32x16x4d_bits12)
1842
1843         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1844                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1845                    vpx_highbd_12_sub_pixel_variance16x32,
1846                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1847                    vpx_highbd_sad16x32x4d_bits12)
1848
1849         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1850                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1851                    vpx_highbd_12_sub_pixel_variance64x32,
1852                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1853                    vpx_highbd_sad64x32x4d_bits12)
1854
1855         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1856                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1857                    vpx_highbd_12_sub_pixel_variance32x64,
1858                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1859                    vpx_highbd_sad32x64x4d_bits12)
1860
1861         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1862                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1863                    vpx_highbd_12_sub_pixel_variance32x32,
1864                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1865                    vpx_highbd_sad32x32x4d_bits12)
1866
1867         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1868                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1869                    vpx_highbd_12_sub_pixel_variance64x64,
1870                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1871                    vpx_highbd_sad64x64x4d_bits12)
1872
1873         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1874                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1875                    vpx_highbd_12_sub_pixel_variance16x16,
1876                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1877                    vpx_highbd_sad16x16x4d_bits12)
1878
1879         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1880                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1881                    vpx_highbd_12_sub_pixel_variance16x8,
1882                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1883                    vpx_highbd_sad16x8x4d_bits12)
1884
1885         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1886                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1887                    vpx_highbd_12_sub_pixel_variance8x16,
1888                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1889                    vpx_highbd_sad8x16x4d_bits12)
1890
1891         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1892                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1893                    vpx_highbd_12_sub_pixel_variance8x8,
1894                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1895                    vpx_highbd_sad8x8x4d_bits12)
1896
1897         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1898                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1899                    vpx_highbd_12_sub_pixel_variance8x4,
1900                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1901                    vpx_highbd_sad8x4x4d_bits12)
1902
1903         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1904                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1905                    vpx_highbd_12_sub_pixel_variance4x8,
1906                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1907                    vpx_highbd_sad4x8x4d_bits12)
1908
1909         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1910                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1911                    vpx_highbd_12_sub_pixel_variance4x4,
1912                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1913                    vpx_highbd_sad4x4x4d_bits12)
1914         break;
1915     }
1916   }
1917 }
1918 #endif  // CONFIG_VP9_HIGHBITDEPTH
1919
1920 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1921   VP9_COMMON *const cm = &cpi->common;
1922
1923   // Create the encoder segmentation map and set all entries to 0
1924   vpx_free(cpi->segmentation_map);
1925   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1926                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1927
1928   // Create a map used for cyclic background refresh.
1929   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1930   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1931                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1932
1933   // Create a map used to mark inactive areas.
1934   vpx_free(cpi->active_map.map);
1935   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1936                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1937
1938   // And a place holder structure is the coding context
1939   // for use if we want to save and restore it
1940   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1941   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1942                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1943 }
1944
1945 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1946   VP9_COMMON *const cm = &cpi->common;
1947   if (cpi->prev_partition == NULL) {
1948     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1949                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1950                                              sizeof(*cpi->prev_partition)));
1951   }
1952   if (cpi->prev_segment_id == NULL) {
1953     CHECK_MEM_ERROR(
1954         cm, cpi->prev_segment_id,
1955         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1956                              sizeof(*cpi->prev_segment_id)));
1957   }
1958   if (cpi->prev_variance_low == NULL) {
1959     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1960                     (uint8_t *)vpx_calloc(
1961                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1962                         sizeof(*cpi->prev_variance_low)));
1963   }
1964   if (cpi->copied_frame_cnt == NULL) {
1965     CHECK_MEM_ERROR(
1966         cm, cpi->copied_frame_cnt,
1967         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1968                               sizeof(*cpi->copied_frame_cnt)));
1969   }
1970 }
1971
1972 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1973   VP9_COMMON *const cm = &cpi->common;
1974   RATE_CONTROL *const rc = &cpi->rc;
1975   int last_w = cpi->oxcf.width;
1976   int last_h = cpi->oxcf.height;
1977
1978   vp9_init_quantizer(cpi);
1979   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1980   cm->bit_depth = oxcf->bit_depth;
1981   cm->color_space = oxcf->color_space;
1982   cm->color_range = oxcf->color_range;
1983
1984   cpi->target_level = oxcf->target_level;
1985   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1986   set_level_constraint(&cpi->level_constraint,
1987                        get_level_index(cpi->target_level));
1988
1989   if (cm->profile <= PROFILE_1)
1990     assert(cm->bit_depth == VPX_BITS_8);
1991   else
1992     assert(cm->bit_depth > VPX_BITS_8);
1993
1994   cpi->oxcf = *oxcf;
1995 #if CONFIG_VP9_HIGHBITDEPTH
1996   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1997 #endif  // CONFIG_VP9_HIGHBITDEPTH
1998
1999   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
2000     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
2001   } else {
2002     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
2003   }
2004
2005   cpi->refresh_golden_frame = 0;
2006   cpi->refresh_last_frame = 1;
2007   cm->refresh_frame_context = 1;
2008   cm->reset_frame_context = 0;
2009
2010   vp9_reset_segment_features(&cm->seg);
2011   vp9_set_high_precision_mv(cpi, 0);
2012
2013   {
2014     int i;
2015
2016     for (i = 0; i < MAX_SEGMENTS; i++)
2017       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
2018   }
2019   cpi->encode_breakout = cpi->oxcf.encode_breakout;
2020
2021   vp9_set_rc_buffer_sizes(cpi);
2022
2023   // Set up frame rate and related parameters rate control values.
2024   vp9_new_framerate(cpi, cpi->framerate);
2025
2026   // Set absolute upper and lower quality limits
2027   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2028   rc->best_quality = cpi->oxcf.best_allowed_q;
2029
2030   cm->interp_filter = cpi->sf.default_interp_filter;
2031
2032   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2033     cm->render_width = cpi->oxcf.render_width;
2034     cm->render_height = cpi->oxcf.render_height;
2035   } else {
2036     cm->render_width = cpi->oxcf.width;
2037     cm->render_height = cpi->oxcf.height;
2038   }
2039   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2040     cm->width = cpi->oxcf.width;
2041     cm->height = cpi->oxcf.height;
2042     cpi->external_resize = 1;
2043   }
2044
2045   if (cpi->initial_width) {
2046     int new_mi_size = 0;
2047     vp9_set_mb_mi(cm, cm->width, cm->height);
2048     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2049     if (cm->mi_alloc_size < new_mi_size) {
2050       vp9_free_context_buffers(cm);
2051       alloc_compressor_data(cpi);
2052       realloc_segmentation_maps(cpi);
2053       cpi->initial_width = cpi->initial_height = 0;
2054       cpi->external_resize = 0;
2055     } else if (cm->mi_alloc_size == new_mi_size &&
2056                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2057       vp9_alloc_loop_filter(cm);
2058     }
2059   }
2060
2061   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2062       last_h != cpi->oxcf.height)
2063     update_frame_size(cpi);
2064
2065   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2066     memset(cpi->consec_zero_mv, 0,
2067            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2068     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2069       vp9_cyclic_refresh_reset_resize(cpi);
2070     rc->rc_1_frame = 0;
2071     rc->rc_2_frame = 0;
2072   }
2073
2074   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2075       ((cpi->svc.number_temporal_layers > 1 ||
2076         cpi->svc.number_spatial_layers > 1) &&
2077        cpi->oxcf.pass != 1)) {
2078     vp9_update_layer_context_change_config(cpi,
2079                                            (int)cpi->oxcf.target_bandwidth);
2080   }
2081
2082   vp9_check_reset_rc_flag(cpi);
2083
2084   cpi->alt_ref_source = NULL;
2085   rc->is_src_frame_alt_ref = 0;
2086
2087 #if 0
2088   // Experimental RD Code
2089   cpi->frame_distortion = 0;
2090   cpi->last_frame_distortion = 0;
2091 #endif
2092
2093   set_tile_limits(cpi);
2094
2095   cpi->ext_refresh_frame_flags_pending = 0;
2096   cpi->ext_refresh_frame_context_pending = 0;
2097
2098 #if CONFIG_VP9_HIGHBITDEPTH
2099   highbd_set_var_fns(cpi);
2100 #endif
2101
2102   vp9_set_row_mt(cpi);
2103 }
2104
2105 #ifndef M_LOG2_E
2106 #define M_LOG2_E 0.693147180559945309417
2107 #endif
2108 #define log2f(x) (log(x) / (float)M_LOG2_E)
2109
2110 /***********************************************************************
2111  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2112  ***********************************************************************
2113  * The following 2 functions ('cal_nmvjointsadcost' and                *
2114  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2115  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2116  * function is generic, but the AVX intrinsics optimised version       *
2117  * relies on the following properties of the computed tables:          *
2118  * For cal_nmvjointsadcost:                                            *
2119  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2120  * For cal_nmvsadcosts:                                                *
2121  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2122  *         (Equal costs for both components)                           *
2123  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2124  *         (Cost function is even)                                     *
2125  * If these do not hold, then the AVX optimised version of the         *
2126  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2127  * case you can revert to using the C function instead.                *
2128  ***********************************************************************/
2129
2130 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2131   /*********************************************************************
2132    * Warning: Read the comments above before modifying this function   *
2133    *********************************************************************/
2134   mvjointsadcost[0] = 600;
2135   mvjointsadcost[1] = 300;
2136   mvjointsadcost[2] = 300;
2137   mvjointsadcost[3] = 300;
2138 }
2139
2140 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2141   /*********************************************************************
2142    * Warning: Read the comments above before modifying this function   *
2143    *********************************************************************/
2144   int i = 1;
2145
2146   mvsadcost[0][0] = 0;
2147   mvsadcost[1][0] = 0;
2148
2149   do {
2150     double z = 256 * (2 * (log2f(8 * i) + .6));
2151     mvsadcost[0][i] = (int)z;
2152     mvsadcost[1][i] = (int)z;
2153     mvsadcost[0][-i] = (int)z;
2154     mvsadcost[1][-i] = (int)z;
2155   } while (++i <= MV_MAX);
2156 }
2157
2158 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2159   int i = 1;
2160
2161   mvsadcost[0][0] = 0;
2162   mvsadcost[1][0] = 0;
2163
2164   do {
2165     double z = 256 * (2 * (log2f(8 * i) + .6));
2166     mvsadcost[0][i] = (int)z;
2167     mvsadcost[1][i] = (int)z;
2168     mvsadcost[0][-i] = (int)z;
2169     mvsadcost[1][-i] = (int)z;
2170   } while (++i <= MV_MAX);
2171 }
2172
2173 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2174   int i;
2175   BufferPool *const pool = cm->buffer_pool;
2176   cm->new_fb_idx = INVALID_IDX;
2177   for (i = 0; i < REF_FRAMES; ++i) {
2178     cm->ref_frame_map[i] = INVALID_IDX;
2179   }
2180   for (i = 0; i < FRAME_BUFFERS; ++i) {
2181     pool->frame_bufs[i].ref_count = 0;
2182   }
2183 }
2184
2185 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
2186                                  int subsampling_x, int subsampling_y) {
2187   VP9_COMMON *const cm = &cpi->common;
2188 #if !CONFIG_VP9_HIGHBITDEPTH
2189   (void)use_highbitdepth;
2190   assert(use_highbitdepth == 0);
2191 #endif
2192
2193   if (!cpi->initial_width ||
2194 #if CONFIG_VP9_HIGHBITDEPTH
2195       cm->use_highbitdepth != use_highbitdepth ||
2196 #endif
2197       cm->subsampling_x != subsampling_x ||
2198       cm->subsampling_y != subsampling_y) {
2199     cm->subsampling_x = subsampling_x;
2200     cm->subsampling_y = subsampling_y;
2201 #if CONFIG_VP9_HIGHBITDEPTH
2202     cm->use_highbitdepth = use_highbitdepth;
2203 #endif
2204     alloc_util_frame_buffers(cpi);
2205     cpi->initial_width = cm->width;
2206     cpi->initial_height = cm->height;
2207     cpi->initial_mbs = cm->MBs;
2208   }
2209 }
2210
2211 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2212 static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
2213                                               unsigned int *subsampling_x,
2214                                               unsigned int *subsampling_y) {
2215   switch (fmt) {
2216     case VPX_IMG_FMT_I420:
2217     case VPX_IMG_FMT_YV12:
2218     case VPX_IMG_FMT_I422:
2219     case VPX_IMG_FMT_I42016:
2220     case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
2221     default: *subsampling_x = 0; break;
2222   }
2223
2224   switch (fmt) {
2225     case VPX_IMG_FMT_I420:
2226     case VPX_IMG_FMT_I440:
2227     case VPX_IMG_FMT_YV12:
2228     case VPX_IMG_FMT_I42016:
2229     case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
2230     default: *subsampling_y = 0; break;
2231   }
2232 }
2233
2234 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2235 static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
2236   return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
2237 }
2238
2239 #if CONFIG_VP9_TEMPORAL_DENOISING
2240 static void setup_denoiser_buffer(VP9_COMP *cpi) {
2241   VP9_COMMON *const cm = &cpi->common;
2242   if (cpi->oxcf.noise_sensitivity > 0 &&
2243       !cpi->denoiser.frame_buffer_initialized) {
2244     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
2245                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
2246                            cm->subsampling_x, cm->subsampling_y,
2247 #if CONFIG_VP9_HIGHBITDEPTH
2248                            cm->use_highbitdepth,
2249 #endif
2250                            VP9_ENC_BORDER_IN_PIXELS))
2251       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
2252                          "Failed to allocate denoiser");
2253   }
2254 }
2255 #endif
2256
2257 void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
2258   const VP9EncoderConfig *oxcf = &cpi->oxcf;
2259   unsigned int subsampling_x, subsampling_y;
2260   const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
2261   vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
2262
2263   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
2264 #if CONFIG_VP9_TEMPORAL_DENOISING
2265   setup_denoiser_buffer(cpi);
2266 #endif
2267
2268   assert(cpi->lookahead == NULL);
2269   cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
2270                                       subsampling_y,
2271 #if CONFIG_VP9_HIGHBITDEPTH
2272                                       use_highbitdepth,
2273 #endif
2274                                       oxcf->lag_in_frames);
2275   alloc_raw_frame_buffers(cpi);
2276 }
2277
2278 VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
2279                                 BufferPool *const pool) {
2280   unsigned int i;
2281   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2282   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2283
2284   if (!cm) return NULL;
2285
2286   vp9_zero(*cpi);
2287
2288   if (setjmp(cm->error.jmp)) {
2289     cm->error.setjmp = 0;
2290     vp9_remove_compressor(cpi);
2291     return 0;
2292   }
2293
2294   cm->error.setjmp = 1;
2295   cm->alloc_mi = vp9_enc_alloc_mi;
2296   cm->free_mi = vp9_enc_free_mi;
2297   cm->setup_mi = vp9_enc_setup_mi;
2298
2299   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2300   CHECK_MEM_ERROR(
2301       cm, cm->frame_contexts,
2302       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2303
2304   cpi->use_svc = 0;
2305   cpi->resize_state = ORIG;
2306   cpi->external_resize = 0;
2307   cpi->resize_avg_qp = 0;
2308   cpi->resize_buffer_underflow = 0;
2309   cpi->use_skin_detection = 0;
2310   cpi->common.buffer_pool = pool;
2311   init_ref_frame_bufs(cm);
2312
2313   cpi->force_update_segmentation = 0;
2314
2315   init_config(cpi, oxcf);
2316   cpi->frame_info = vp9_get_frame_info(oxcf);
2317
2318   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2319
2320   init_frame_indexes(cm);
2321   cpi->partition_search_skippable_frame = 0;
2322   cpi->tile_data = NULL;
2323
2324   realloc_segmentation_maps(cpi);
2325
2326   CHECK_MEM_ERROR(
2327       cm, cpi->skin_map,
2328       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2329
2330 #if !CONFIG_REALTIME_ONLY
2331   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2332 #endif
2333
2334   CHECK_MEM_ERROR(
2335       cm, cpi->consec_zero_mv,
2336       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2337
2338   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2339                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2340   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2341                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2342   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2343                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2344   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2345                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2346   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2347                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2348   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2349                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2350   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2351                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2352   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2353                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2354
2355   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2356        i++) {
2357     CHECK_MEM_ERROR(
2358         cm, cpi->mbgraph_stats[i].mb_stats,
2359         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2360   }
2361
2362 #if CONFIG_FP_MB_STATS
2363   cpi->use_fp_mb_stats = 0;
2364   if (cpi->use_fp_mb_stats) {
2365     // a place holder used to store the first pass mb stats in the first pass
2366     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2367                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2368   } else {
2369     cpi->twopass.frame_mb_stats_buf = NULL;
2370   }
2371 #endif
2372
2373   cpi->refresh_alt_ref_frame = 0;
2374   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2375
2376   init_level_info(&cpi->level_info);
2377   init_level_constraint(&cpi->level_constraint);
2378
2379 #if CONFIG_INTERNAL_STATS
2380   cpi->b_calculate_blockiness = 1;
2381   cpi->b_calculate_consistency = 1;
2382   cpi->total_inconsistency = 0;
2383   cpi->psnr.worst = 100.0;
2384   cpi->worst_ssim = 100.0;
2385
2386   cpi->count = 0;
2387   cpi->bytes = 0;
2388
2389   if (cpi->b_calculate_psnr) {
2390     cpi->total_sq_error = 0;
2391     cpi->total_samples = 0;
2392
2393     cpi->totalp_sq_error = 0;
2394     cpi->totalp_samples = 0;
2395
2396     cpi->tot_recode_hits = 0;
2397     cpi->summed_quality = 0;
2398     cpi->summed_weights = 0;
2399     cpi->summedp_quality = 0;
2400     cpi->summedp_weights = 0;
2401   }
2402
2403   cpi->fastssim.worst = 100.0;
2404
2405   cpi->psnrhvs.worst = 100.0;
2406
2407   if (cpi->b_calculate_blockiness) {
2408     cpi->total_blockiness = 0;
2409     cpi->worst_blockiness = 0.0;
2410   }
2411
2412   if (cpi->b_calculate_consistency) {
2413     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2414                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2415                                sizeof(*cpi->ssim_vars) * 4));
2416     cpi->worst_consistency = 100.0;
2417   } else {
2418     cpi->ssim_vars = NULL;
2419   }
2420
2421 #endif
2422
2423   cpi->first_time_stamp_ever = INT64_MAX;
2424
2425   /*********************************************************************
2426    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2427    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2428    *********************************************************************/
2429   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2430   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2431   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2432   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2433   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2434   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2435
2436   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2437   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2438   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2439   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2440   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2441
2442 #if CONFIG_VP9_TEMPORAL_DENOISING
2443 #ifdef OUTPUT_YUV_DENOISED
2444   yuv_denoised_file = fopen("denoised.yuv", "ab");
2445 #endif
2446 #endif
2447 #ifdef OUTPUT_YUV_SKINMAP
2448   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2449 #endif
2450 #ifdef OUTPUT_YUV_REC
2451   yuv_rec_file = fopen("rec.yuv", "wb");
2452 #endif
2453 #ifdef OUTPUT_YUV_SVC_SRC
2454   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2455   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2456   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2457 #endif
2458
2459 #if 0
2460   framepsnr = fopen("framepsnr.stt", "a");
2461   kf_list = fopen("kf_list.stt", "w");
2462 #endif
2463
2464   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2465
2466 #if !CONFIG_REALTIME_ONLY
2467   if (oxcf->pass == 1) {
2468     vp9_init_first_pass(cpi);
2469   } else if (oxcf->pass == 2) {
2470     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2471     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2472
2473     if (cpi->svc.number_spatial_layers > 1 ||
2474         cpi->svc.number_temporal_layers > 1) {
2475       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2476       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2477       int i;
2478
2479       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2480         FIRSTPASS_STATS *const last_packet_for_layer =
2481             &stats[packets - oxcf->ss_number_layers + i];
2482         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2483         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2484         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2485           int num_frames;
2486           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2487
2488           vpx_free(lc->rc_twopass_stats_in.buf);
2489
2490           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2491           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2492                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2493           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2494           lc->twopass.stats_in = lc->twopass.stats_in_start;
2495           lc->twopass.stats_in_end =
2496               lc->twopass.stats_in_start + packets_in_layer - 1;
2497           // Note the last packet is cumulative first pass stats.
2498           // So the number of frames is packet number minus one
2499           num_frames = packets_in_layer - 1;
2500           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2501                                    lc->rc_twopass_stats_in.buf, num_frames);
2502           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2503         }
2504       }
2505
2506       for (i = 0; i < packets; ++i) {
2507         const int layer_id = (int)stats[i].spatial_layer_id;
2508         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2509             stats_copy[layer_id] != NULL) {
2510           *stats_copy[layer_id] = stats[i];
2511           ++stats_copy[layer_id];
2512         }
2513       }
2514
2515       vp9_init_second_pass_spatial_svc(cpi);
2516     } else {
2517       int num_frames;
2518 #if CONFIG_FP_MB_STATS
2519       if (cpi->use_fp_mb_stats) {
2520         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2521         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2522
2523         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2524             oxcf->firstpass_mb_stats_in.buf;
2525         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2526             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2527             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2528       }
2529 #endif
2530
2531       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2532       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2533       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2534       // Note the last packet is cumulative first pass stats.
2535       // So the number of frames is packet number minus one
2536       num_frames = packets - 1;
2537       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2538                                oxcf->two_pass_stats_in.buf, num_frames);
2539
2540       vp9_init_second_pass(cpi);
2541     }
2542   }
2543 #endif  // !CONFIG_REALTIME_ONLY
2544
2545   cpi->mb_wiener_var_cols = 0;
2546   cpi->mb_wiener_var_rows = 0;
2547   cpi->mb_wiener_variance = NULL;
2548
2549   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2550   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2551
2552   {
2553     const int bsize = BLOCK_16X16;
2554     const int w = num_8x8_blocks_wide_lookup[bsize];
2555     const int h = num_8x8_blocks_high_lookup[bsize];
2556     const int num_cols = (cm->mi_cols + w - 1) / w;
2557     const int num_rows = (cm->mi_rows + h - 1) / h;
2558     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2559                     vpx_calloc(num_rows * num_cols,
2560                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2561   }
2562
2563   cpi->kmeans_data_arr_alloc = 0;
2564 #if CONFIG_NON_GREEDY_MV
2565   cpi->tpl_ready = 0;
2566 #endif  // CONFIG_NON_GREEDY_MV
2567   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2568
2569   // Allocate memory to store variances for a frame.
2570   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2571   cpi->source_var_thresh = 0;
2572   cpi->frames_till_next_var_check = 0;
2573 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2574   cpi->fn_ptr[BT].sdf = SDF;                             \
2575   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2576   cpi->fn_ptr[BT].vf = VF;                               \
2577   cpi->fn_ptr[BT].svf = SVF;                             \
2578   cpi->fn_ptr[BT].svaf = SVAF;                           \
2579   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2580   cpi->fn_ptr[BT].sdx8f = SDX8F;
2581
2582   // TODO(angiebird): make sdx8f available for every block size
2583   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2584       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2585       vpx_sad32x16x4d, NULL)
2586
2587   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2588       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2589       vpx_sad16x32x4d, NULL)
2590
2591   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2592       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2593       vpx_sad64x32x4d, NULL)
2594
2595   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2596       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2597       vpx_sad32x64x4d, NULL)
2598
2599   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2600       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2601       vpx_sad32x32x4d, vpx_sad32x32x8)
2602
2603   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2604       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2605       vpx_sad64x64x4d, NULL)
2606
2607   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2608       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2609       vpx_sad16x16x4d, vpx_sad16x16x8)
2610
2611   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2612       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2613       vpx_sad16x8x4d, vpx_sad16x8x8)
2614
2615   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2616       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2617       vpx_sad8x16x4d, vpx_sad8x16x8)
2618
2619   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2620       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2621       vpx_sad8x8x8)
2622
2623   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2624       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2625       NULL)
2626
2627   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2628       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2629       NULL)
2630
2631   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2632       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2633       vpx_sad4x4x8)
2634
2635 #if CONFIG_VP9_HIGHBITDEPTH
2636   highbd_set_var_fns(cpi);
2637 #endif
2638
2639   /* vp9_init_quantizer() is first called here. Add check in
2640    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2641    * called later when needed. This will avoid unnecessary calls of
2642    * vp9_init_quantizer() for every frame.
2643    */
2644   vp9_init_quantizer(cpi);
2645
2646   vp9_loop_filter_init(cm);
2647
2648   // Set up the unit scaling factor used during motion search.
2649 #if CONFIG_VP9_HIGHBITDEPTH
2650   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2651                                     cm->width, cm->height,
2652                                     cm->use_highbitdepth);
2653 #else
2654   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2655                                     cm->width, cm->height);
2656 #endif  // CONFIG_VP9_HIGHBITDEPTH
2657   cpi->td.mb.me_sf = &cpi->me_sf;
2658
2659   cm->error.setjmp = 0;
2660
2661 #if CONFIG_RATE_CTRL
2662   encode_command_init(&cpi->encode_command);
2663   partition_info_init(cpi);
2664   motion_vector_info_init(cpi);
2665   fp_motion_vector_info_init(cpi);
2666 #endif
2667
2668   return cpi;
2669 }
2670
2671 #if CONFIG_INTERNAL_STATS
2672 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2673
2674 #define SNPRINT2(H, T, V) \
2675   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2676 #endif  // CONFIG_INTERNAL_STATS
2677
2678 static void free_tpl_buffer(VP9_COMP *cpi);
2679
2680 void vp9_remove_compressor(VP9_COMP *cpi) {
2681   VP9_COMMON *cm;
2682   unsigned int i;
2683   int t;
2684
2685   if (!cpi) return;
2686
2687 #if CONFIG_INTERNAL_STATS
2688   vpx_free(cpi->ssim_vars);
2689 #endif
2690
2691   cm = &cpi->common;
2692   if (cm->current_video_frame > 0) {
2693 #if CONFIG_INTERNAL_STATS
2694     vpx_clear_system_state();
2695
2696     if (cpi->oxcf.pass != 1) {
2697       char headings[512] = { 0 };
2698       char results[512] = { 0 };
2699       FILE *f = fopen("opsnr.stt", "a");
2700       double time_encoded =
2701           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2702           10000000.000;
2703       double total_encode_time =
2704           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2705       const double dr =
2706           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2707       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2708       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2709       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2710
2711       if (cpi->b_calculate_psnr) {
2712         const double total_psnr = vpx_sse_to_psnr(
2713             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2714         const double totalp_psnr = vpx_sse_to_psnr(
2715             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2716         const double total_ssim =
2717             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2718         const double totalp_ssim =
2719             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2720
2721         snprintf(headings, sizeof(headings),
2722                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2723                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2724                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2725                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2726         snprintf(results, sizeof(results),
2727                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2728                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2729                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2730                  "%7.3f\t%7.3f\t%7.3f",
2731                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2732                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2733                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2734                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2735                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2736                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2737                  cpi->psnr.stat[V] / cpi->count);
2738
2739         if (cpi->b_calculate_blockiness) {
2740           SNPRINT(headings, "\t  Block\tWstBlck");
2741           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2742           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2743         }
2744
2745         if (cpi->b_calculate_consistency) {
2746           double consistency =
2747               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2748                               (double)cpi->total_inconsistency);
2749
2750           SNPRINT(headings, "\tConsist\tWstCons");
2751           SNPRINT2(results, "\t%7.3f", consistency);
2752           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2753         }
2754
2755         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2756         SNPRINT2(results, "\t%8.0f", total_encode_time);
2757         SNPRINT2(results, "\t%7.2f", rate_err);
2758         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2759
2760         fprintf(f, "%s\tAPsnr611\n", headings);
2761         fprintf(
2762             f, "%s\t%7.3f\n", results,
2763             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2764                 (cpi->count * 8));
2765       }
2766
2767       fclose(f);
2768     }
2769 #endif
2770
2771 #if 0
2772     {
2773       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2774       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2775       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2776              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2777              cpi->time_compress_data / 1000,
2778              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2779     }
2780 #endif
2781   }
2782
2783 #if CONFIG_VP9_TEMPORAL_DENOISING
2784   vp9_denoiser_free(&(cpi->denoiser));
2785 #endif
2786
2787   if (cpi->kmeans_data_arr_alloc) {
2788 #if CONFIG_MULTITHREAD
2789     pthread_mutex_destroy(&cpi->kmeans_mutex);
2790 #endif
2791     vpx_free(cpi->kmeans_data_arr);
2792   }
2793
2794   free_tpl_buffer(cpi);
2795
2796   for (t = 0; t < cpi->num_workers; ++t) {
2797     VPxWorker *const worker = &cpi->workers[t];
2798     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2799
2800     // Deallocate allocated threads.
2801     vpx_get_worker_interface()->end(worker);
2802
2803     // Deallocate allocated thread data.
2804     if (t < cpi->num_workers - 1) {
2805       vpx_free(thread_data->td->counts);
2806       vp9_free_pc_tree(thread_data->td);
2807       vpx_free(thread_data->td);
2808     }
2809   }
2810   vpx_free(cpi->tile_thr_data);
2811   vpx_free(cpi->workers);
2812   vp9_row_mt_mem_dealloc(cpi);
2813
2814   if (cpi->num_workers > 1) {
2815     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2816     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2817   }
2818
2819 #if !CONFIG_REALTIME_ONLY
2820   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2821 #endif
2822
2823   dealloc_compressor_data(cpi);
2824
2825   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2826        ++i) {
2827     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2828   }
2829
2830 #if CONFIG_FP_MB_STATS
2831   if (cpi->use_fp_mb_stats) {
2832     vpx_free(cpi->twopass.frame_mb_stats_buf);
2833     cpi->twopass.frame_mb_stats_buf = NULL;
2834   }
2835 #endif
2836
2837   vp9_remove_common(cm);
2838   vp9_free_ref_frame_buffers(cm->buffer_pool);
2839 #if CONFIG_VP9_POSTPROC
2840   vp9_free_postproc_buffers(cm);
2841 #endif
2842   vpx_free(cpi);
2843
2844 #if CONFIG_VP9_TEMPORAL_DENOISING
2845 #ifdef OUTPUT_YUV_DENOISED
2846   fclose(yuv_denoised_file);
2847 #endif
2848 #endif
2849 #ifdef OUTPUT_YUV_SKINMAP
2850   fclose(yuv_skinmap_file);
2851 #endif
2852 #ifdef OUTPUT_YUV_REC
2853   fclose(yuv_rec_file);
2854 #endif
2855 #ifdef OUTPUT_YUV_SVC_SRC
2856   fclose(yuv_svc_src[0]);
2857   fclose(yuv_svc_src[1]);
2858   fclose(yuv_svc_src[2]);
2859 #endif
2860
2861 #if 0
2862
2863   if (keyfile)
2864     fclose(keyfile);
2865
2866   if (framepsnr)
2867     fclose(framepsnr);
2868
2869   if (kf_list)
2870     fclose(kf_list);
2871
2872 #endif
2873 }
2874
2875 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2876   if (is_psnr_calc_enabled(cpi)) {
2877 #if CONFIG_VP9_HIGHBITDEPTH
2878     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2879                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2880 #else
2881     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2882 #endif
2883     return 1;
2884   } else {
2885     vp9_zero(*psnr);
2886     return 0;
2887   }
2888 }
2889
2890 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2891   if (ref_frame_flags > 7) return -1;
2892
2893   cpi->ref_frame_flags = ref_frame_flags;
2894   return 0;
2895 }
2896
2897 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2898   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2899   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2900   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2901   cpi->ext_refresh_frame_flags_pending = 1;
2902 }
2903
2904 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2905     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2906   MV_REFERENCE_FRAME ref_frame = NONE;
2907   if (ref_frame_flag == VP9_LAST_FLAG)
2908     ref_frame = LAST_FRAME;
2909   else if (ref_frame_flag == VP9_GOLD_FLAG)
2910     ref_frame = GOLDEN_FRAME;
2911   else if (ref_frame_flag == VP9_ALT_FLAG)
2912     ref_frame = ALTREF_FRAME;
2913
2914   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2915 }
2916
2917 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2918                            YV12_BUFFER_CONFIG *sd) {
2919   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2920   if (cfg) {
2921     vpx_yv12_copy_frame(cfg, sd);
2922     return 0;
2923   } else {
2924     return -1;
2925   }
2926 }
2927
2928 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2929                           YV12_BUFFER_CONFIG *sd) {
2930   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2931   if (cfg) {
2932     vpx_yv12_copy_frame(sd, cfg);
2933     return 0;
2934   } else {
2935     return -1;
2936   }
2937 }
2938
2939 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2940   cpi->ext_refresh_frame_context = update;
2941   cpi->ext_refresh_frame_context_pending = 1;
2942   return 0;
2943 }
2944
2945 #ifdef OUTPUT_YUV_REC
2946 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2947   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2948   uint8_t *src = s->y_buffer;
2949   int h = cm->height;
2950
2951 #if CONFIG_VP9_HIGHBITDEPTH
2952   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2953     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2954
2955     do {
2956       fwrite(src16, s->y_width, 2, yuv_rec_file);
2957       src16 += s->y_stride;
2958     } while (--h);
2959
2960     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2961     h = s->uv_height;
2962
2963     do {
2964       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2965       src16 += s->uv_stride;
2966     } while (--h);
2967
2968     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2969     h = s->uv_height;
2970
2971     do {
2972       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2973       src16 += s->uv_stride;
2974     } while (--h);
2975
2976     fflush(yuv_rec_file);
2977     return;
2978   }
2979 #endif  // CONFIG_VP9_HIGHBITDEPTH
2980
2981   do {
2982     fwrite(src, s->y_width, 1, yuv_rec_file);
2983     src += s->y_stride;
2984   } while (--h);
2985
2986   src = s->u_buffer;
2987   h = s->uv_height;
2988
2989   do {
2990     fwrite(src, s->uv_width, 1, yuv_rec_file);
2991     src += s->uv_stride;
2992   } while (--h);
2993
2994   src = s->v_buffer;
2995   h = s->uv_height;
2996
2997   do {
2998     fwrite(src, s->uv_width, 1, yuv_rec_file);
2999     src += s->uv_stride;
3000   } while (--h);
3001
3002   fflush(yuv_rec_file);
3003 }
3004 #endif
3005
3006 #if CONFIG_VP9_HIGHBITDEPTH
3007 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3008                                                 YV12_BUFFER_CONFIG *dst,
3009                                                 int bd) {
3010 #else
3011 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3012                                                 YV12_BUFFER_CONFIG *dst) {
3013 #endif  // CONFIG_VP9_HIGHBITDEPTH
3014   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
3015   int i;
3016   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3017                                    src->v_buffer };
3018   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3019   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
3020                               src->uv_crop_width };
3021   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
3022                                src->uv_crop_height };
3023   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3024   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3025   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
3026                               dst->uv_crop_width };
3027   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
3028                                dst->uv_crop_height };
3029
3030   for (i = 0; i < MAX_MB_PLANE; ++i) {
3031 #if CONFIG_VP9_HIGHBITDEPTH
3032     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3033       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
3034                               src_strides[i], dsts[i], dst_heights[i],
3035                               dst_widths[i], dst_strides[i], bd);
3036     } else {
3037       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3038                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3039     }
3040 #else
3041     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3042                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3043 #endif  // CONFIG_VP9_HIGHBITDEPTH
3044   }
3045   vpx_extend_frame_borders(dst);
3046 }
3047
3048 #if CONFIG_VP9_HIGHBITDEPTH
3049 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
3050                                    YV12_BUFFER_CONFIG *dst, int bd,
3051                                    INTERP_FILTER filter_type,
3052                                    int phase_scaler) {
3053   const int src_w = src->y_crop_width;
3054   const int src_h = src->y_crop_height;
3055   const int dst_w = dst->y_crop_width;
3056   const int dst_h = dst->y_crop_height;
3057   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3058                                    src->v_buffer };
3059   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3060   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3061   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3062   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
3063   int x, y, i;
3064
3065   for (i = 0; i < MAX_MB_PLANE; ++i) {
3066     const int factor = (i == 0 || i == 3 ? 1 : 2);
3067     const int src_stride = src_strides[i];
3068     const int dst_stride = dst_strides[i];
3069     for (y = 0; y < dst_h; y += 16) {
3070       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
3071       for (x = 0; x < dst_w; x += 16) {
3072         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
3073         const uint8_t *src_ptr = srcs[i] +
3074                                  (y / factor) * src_h / dst_h * src_stride +
3075                                  (x / factor) * src_w / dst_w;
3076         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
3077
3078         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3079           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
3080                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
3081                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3082                                16 * src_h / dst_h, 16 / factor, 16 / factor,
3083                                bd);
3084         } else {
3085           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
3086                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3087                         16 * src_h / dst_h, 16 / factor, 16 / factor);
3088         }
3089       }
3090     }
3091   }
3092
3093   vpx_extend_frame_borders(dst);
3094 }
3095 #endif  // CONFIG_VP9_HIGHBITDEPTH
3096
3097 #if !CONFIG_REALTIME_ONLY
3098 static int scale_down(VP9_COMP *cpi, int q) {
3099   RATE_CONTROL *const rc = &cpi->rc;
3100   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3101   int scale = 0;
3102   assert(frame_is_kf_gf_arf(cpi));
3103
3104   if (rc->frame_size_selector == UNSCALED &&
3105       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
3106     const int max_size_thresh =
3107         (int)(rate_thresh_mult[SCALE_STEP1] *
3108               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
3109     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
3110   }
3111   return scale;
3112 }
3113
3114 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3115   const RATE_CONTROL *const rc = &cpi->rc;
3116   int big_miss_high;
3117
3118   if (frame_is_kf_gf_arf(cpi))
3119     big_miss_high = rc->this_frame_target * 3 / 2;
3120   else
3121     big_miss_high = rc->this_frame_target * 2;
3122
3123   return big_miss_high;
3124 }
3125
3126 static int big_rate_miss(VP9_COMP *cpi) {
3127   const RATE_CONTROL *const rc = &cpi->rc;
3128   int big_miss_high;
3129   int big_miss_low;
3130
3131   // Ignore for overlay frames
3132   if (rc->is_src_frame_alt_ref) {
3133     return 0;
3134   } else {
3135     big_miss_low = (rc->this_frame_target / 2);
3136     big_miss_high = big_rate_miss_high_threshold(cpi);
3137
3138     return (rc->projected_frame_size > big_miss_high) ||
3139            (rc->projected_frame_size < big_miss_low);
3140   }
3141 }
3142
3143 // test in two pass for the first
3144 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3145   if (cpi->oxcf.pass == 2) {
3146     TWO_PASS *const twopass = &cpi->twopass;
3147     GF_GROUP *const gf_group = &twopass->gf_group;
3148     const int gfg_index = gf_group->index;
3149
3150     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3151     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3152            gf_group->update_type[gfg_index] == LF_UPDATE;
3153   } else {
3154     return 0;
3155   }
3156 }
3157
3158 // Function to test for conditions that indicate we should loop
3159 // back and recode a frame.
3160 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3161                             int maxq, int minq) {
3162   const RATE_CONTROL *const rc = &cpi->rc;
3163   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3164   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3165   int force_recode = 0;
3166
3167   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3168       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3169       (two_pass_first_group_inter(cpi) &&
3170        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3171       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3172     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3173         scale_down(cpi, q)) {
3174       // Code this group at a lower resolution.
3175       cpi->resize_pending = 1;
3176       return 1;
3177     }
3178
3179     // Force recode for extreme overshoot.
3180     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3181         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3182          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3183       return 1;
3184     }
3185
3186     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3187     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3188         (rc->projected_frame_size < low_limit && q > minq)) {
3189       force_recode = 1;
3190     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3191       // Deal with frame undershoot and whether or not we are
3192       // below the automatically set cq level.
3193       if (q > oxcf->cq_level &&
3194           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3195         force_recode = 1;
3196       }
3197     }
3198   }
3199   return force_recode;
3200 }
3201 #endif  // !CONFIG_REALTIME_ONLY
3202
3203 static void update_ref_frames(VP9_COMP *cpi) {
3204   VP9_COMMON *const cm = &cpi->common;
3205   BufferPool *const pool = cm->buffer_pool;
3206   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3207
3208   if (cpi->rc.show_arf_as_gld) {
3209     int tmp = cpi->alt_fb_idx;
3210     cpi->alt_fb_idx = cpi->gld_fb_idx;
3211     cpi->gld_fb_idx = tmp;
3212   } else if (cm->show_existing_frame) {
3213     // Pop ARF.
3214     cpi->lst_fb_idx = cpi->alt_fb_idx;
3215     cpi->alt_fb_idx =
3216         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3217     --gf_group->stack_size;
3218   }
3219
3220   // At this point the new frame has been encoded.
3221   // If any buffer copy / swapping is signaled it should be done here.
3222   if (cm->frame_type == KEY_FRAME) {
3223     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3224                cm->new_fb_idx);
3225     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3226                cm->new_fb_idx);
3227   } else if (vp9_preserve_existing_gf(cpi)) {
3228     // We have decided to preserve the previously existing golden frame as our
3229     // new ARF frame. However, in the short term in function
3230     // vp9_get_refresh_mask() we left it in the GF slot and, if
3231     // we're updating the GF with the current decoded frame, we save it to the
3232     // ARF slot instead.
3233     // We now have to update the ARF with the current frame and swap gld_fb_idx
3234     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3235     // slot and, if we're updating the GF, the current frame becomes the new GF.
3236     int tmp;
3237
3238     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3239                cm->new_fb_idx);
3240
3241     tmp = cpi->alt_fb_idx;
3242     cpi->alt_fb_idx = cpi->gld_fb_idx;
3243     cpi->gld_fb_idx = tmp;
3244   } else { /* For non key/golden frames */
3245     if (cpi->refresh_alt_ref_frame) {
3246       int arf_idx = gf_group->top_arf_idx;
3247
3248       // Push new ARF into stack.
3249       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3250                  gf_group->stack_size);
3251       ++gf_group->stack_size;
3252
3253       assert(arf_idx < REF_FRAMES);
3254
3255       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3256       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3257              cpi->interp_filter_selected[0],
3258              sizeof(cpi->interp_filter_selected[0]));
3259
3260       cpi->alt_fb_idx = arf_idx;
3261     }
3262
3263     if (cpi->refresh_golden_frame) {
3264       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3265                  cm->new_fb_idx);
3266       if (!cpi->rc.is_src_frame_alt_ref)
3267         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3268                cpi->interp_filter_selected[0],
3269                sizeof(cpi->interp_filter_selected[0]));
3270       else
3271         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3272                cpi->interp_filter_selected[ALTREF_FRAME],
3273                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3274     }
3275   }
3276
3277   if (cpi->refresh_last_frame) {
3278     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3279                cm->new_fb_idx);
3280     if (!cpi->rc.is_src_frame_alt_ref)
3281       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3282              cpi->interp_filter_selected[0],
3283              sizeof(cpi->interp_filter_selected[0]));
3284   }
3285
3286   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3287     cpi->alt_fb_idx =
3288         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3289     --gf_group->stack_size;
3290   }
3291 }
3292
3293 void vp9_update_reference_frames(VP9_COMP *cpi) {
3294   update_ref_frames(cpi);
3295
3296 #if CONFIG_VP9_TEMPORAL_DENOISING
3297   vp9_denoiser_update_ref_frame(cpi);
3298 #endif
3299
3300   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3301 }
3302
3303 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3304   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3305   struct loopfilter *lf = &cm->lf;
3306   int is_reference_frame =
3307       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3308        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3309   if (cpi->use_svc &&
3310       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3311     is_reference_frame = !cpi->svc.non_reference_frame;
3312
3313   // Skip loop filter in show_existing_frame mode.
3314   if (cm->show_existing_frame) {
3315     lf->filter_level = 0;
3316     return;
3317   }
3318
3319   if (xd->lossless) {
3320     lf->filter_level = 0;
3321     lf->last_filt_level = 0;
3322   } else {
3323     struct vpx_usec_timer timer;
3324
3325     vpx_clear_system_state();
3326
3327     vpx_usec_timer_start(&timer);
3328
3329     if (!cpi->rc.is_src_frame_alt_ref) {
3330       if ((cpi->common.frame_type == KEY_FRAME) &&
3331           (!cpi->rc.this_key_frame_forced)) {
3332         lf->last_filt_level = 0;
3333       }
3334       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3335       lf->last_filt_level = lf->filter_level;
3336     } else {
3337       lf->filter_level = 0;
3338     }
3339
3340     vpx_usec_timer_mark(&timer);
3341     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3342   }
3343
3344   if (lf->filter_level > 0 && is_reference_frame) {
3345     vp9_build_mask_frame(cm, lf->filter_level, 0);
3346
3347     if (cpi->num_workers > 1)
3348       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3349                                lf->filter_level, 0, 0, cpi->workers,
3350                                cpi->num_workers, &cpi->lf_row_sync);
3351     else
3352       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3353   }
3354
3355   vpx_extend_frame_inner_borders(cm->frame_to_show);
3356 }
3357
3358 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3359   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3360   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3361       new_fb_ptr->mi_cols < cm->mi_cols) {
3362     vpx_free(new_fb_ptr->mvs);
3363     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3364                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3365                                          sizeof(*new_fb_ptr->mvs)));
3366     new_fb_ptr->mi_rows = cm->mi_rows;
3367     new_fb_ptr->mi_cols = cm->mi_cols;
3368   }
3369 }
3370
3371 void vp9_scale_references(VP9_COMP *cpi) {
3372   VP9_COMMON *cm = &cpi->common;
3373   MV_REFERENCE_FRAME ref_frame;
3374   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3375                                      VP9_ALT_FLAG };
3376
3377   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3378     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3379     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3380       BufferPool *const pool = cm->buffer_pool;
3381       const YV12_BUFFER_CONFIG *const ref =
3382           get_ref_frame_buffer(cpi, ref_frame);
3383
3384       if (ref == NULL) {
3385         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3386         continue;
3387       }
3388
3389 #if CONFIG_VP9_HIGHBITDEPTH
3390       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3391         RefCntBuffer *new_fb_ptr = NULL;
3392         int force_scaling = 0;
3393         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3394         if (new_fb == INVALID_IDX) {
3395           new_fb = get_free_fb(cm);
3396           force_scaling = 1;
3397         }
3398         if (new_fb == INVALID_IDX) return;
3399         new_fb_ptr = &pool->frame_bufs[new_fb];
3400         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3401             new_fb_ptr->buf.y_crop_height != cm->height) {
3402           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3403                                        cm->subsampling_x, cm->subsampling_y,
3404                                        cm->use_highbitdepth,
3405                                        VP9_ENC_BORDER_IN_PIXELS,
3406                                        cm->byte_alignment, NULL, NULL, NULL))
3407             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3408                                "Failed to allocate frame buffer");
3409           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3410                                  EIGHTTAP, 0);
3411           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3412           alloc_frame_mvs(cm, new_fb);
3413         }
3414 #else
3415       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3416         RefCntBuffer *new_fb_ptr = NULL;
3417         int force_scaling = 0;
3418         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3419         if (new_fb == INVALID_IDX) {
3420           new_fb = get_free_fb(cm);
3421           force_scaling = 1;
3422         }
3423         if (new_fb == INVALID_IDX) return;
3424         new_fb_ptr = &pool->frame_bufs[new_fb];
3425         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3426             new_fb_ptr->buf.y_crop_height != cm->height) {
3427           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3428                                        cm->subsampling_x, cm->subsampling_y,
3429                                        VP9_ENC_BORDER_IN_PIXELS,
3430                                        cm->byte_alignment, NULL, NULL, NULL))
3431             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3432                                "Failed to allocate frame buffer");
3433           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3434           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3435           alloc_frame_mvs(cm, new_fb);
3436         }
3437 #endif  // CONFIG_VP9_HIGHBITDEPTH
3438       } else {
3439         int buf_idx;
3440         RefCntBuffer *buf = NULL;
3441         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3442           // Check for release of scaled reference.
3443           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3444           if (buf_idx != INVALID_IDX) {
3445             buf = &pool->frame_bufs[buf_idx];
3446             --buf->ref_count;
3447             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3448           }
3449         }
3450         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3451         buf = &pool->frame_bufs[buf_idx];
3452         buf->buf.y_crop_width = ref->y_crop_width;
3453         buf->buf.y_crop_height = ref->y_crop_height;
3454         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3455         ++buf->ref_count;
3456       }
3457     } else {
3458       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3459         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3460     }
3461   }
3462 }
3463
3464 static void release_scaled_references(VP9_COMP *cpi) {
3465   VP9_COMMON *cm = &cpi->common;
3466   int i;
3467   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3468     // Only release scaled references under certain conditions:
3469     // if reference will be updated, or if scaled reference has same resolution.
3470     int refresh[3];
3471     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3472     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3473     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3474     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3475       const int idx = cpi->scaled_ref_idx[i - 1];
3476       if (idx != INVALID_IDX) {
3477         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3478         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3479         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3480                                buf->buf.y_crop_height == ref->y_crop_height)) {
3481           --buf->ref_count;
3482           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3483         }
3484       }
3485     }
3486   } else {
3487     for (i = 0; i < REFS_PER_FRAME; ++i) {
3488       const int idx = cpi->scaled_ref_idx[i];
3489       if (idx != INVALID_IDX) {
3490         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3491         --buf->ref_count;
3492         cpi->scaled_ref_idx[i] = INVALID_IDX;
3493       }
3494     }
3495   }
3496 }
3497
3498 static void full_to_model_count(unsigned int *model_count,
3499                                 unsigned int *full_count) {
3500   int n;
3501   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3502   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3503   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3504   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3505     model_count[TWO_TOKEN] += full_count[n];
3506   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3507 }
3508
3509 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3510                                  vp9_coeff_count *full_count) {
3511   int i, j, k, l;
3512
3513   for (i = 0; i < PLANE_TYPES; ++i)
3514     for (j = 0; j < REF_TYPES; ++j)
3515       for (k = 0; k < COEF_BANDS; ++k)
3516         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3517           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3518 }
3519
3520 #if 0 && CONFIG_INTERNAL_STATS
3521 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3522   VP9_COMMON *const cm = &cpi->common;
3523   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3524   int64_t recon_err;
3525
3526   vpx_clear_system_state();
3527
3528 #if CONFIG_VP9_HIGHBITDEPTH
3529   if (cm->use_highbitdepth) {
3530     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3531   } else {
3532     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3533   }
3534 #else
3535   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3536 #endif  // CONFIG_VP9_HIGHBITDEPTH
3537
3538
3539   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3540     double dc_quant_devisor;
3541 #if CONFIG_VP9_HIGHBITDEPTH
3542     switch (cm->bit_depth) {
3543       case VPX_BITS_8:
3544         dc_quant_devisor = 4.0;
3545         break;
3546       case VPX_BITS_10:
3547         dc_quant_devisor = 16.0;
3548         break;
3549       default:
3550         assert(cm->bit_depth == VPX_BITS_12);
3551         dc_quant_devisor = 64.0;
3552         break;
3553     }
3554 #else
3555     dc_quant_devisor = 4.0;
3556 #endif
3557
3558     if (!cm->current_video_frame) {
3559       fprintf(f, "frame, width, height, last ts, last end ts, "
3560           "source_alt_ref_pending, source_alt_ref_active, "
3561           "this_frame_target, projected_frame_size, "
3562           "projected_frame_size / MBs, "
3563           "projected_frame_size - this_frame_target, "
3564           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3565           "twopass.extend_minq, twopass.extend_minq_fast, "
3566           "total_target_vs_actual, "
3567           "starting_buffer_level - bits_off_target, "
3568           "total_actual_bits, base_qindex, q for base_qindex, "
3569           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3570           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3571           "frame_type, gfu_boost, "
3572           "twopass.bits_left, "
3573           "twopass.total_left_stats.coded_error, "
3574           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3575           "tot_recode_hits, recon_err, kf_boost, "
3576           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3577           "filter_level, seg.aq_av_offset\n");
3578     }
3579
3580     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3581         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3582         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3583         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3584         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3585         cpi->common.current_video_frame,
3586         cm->width, cm->height,
3587         cpi->last_time_stamp_seen,
3588         cpi->last_end_time_stamp_seen,
3589         cpi->rc.source_alt_ref_pending,
3590         cpi->rc.source_alt_ref_active,
3591         cpi->rc.this_frame_target,
3592         cpi->rc.projected_frame_size,
3593         cpi->rc.projected_frame_size / cpi->common.MBs,
3594         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3595         cpi->rc.vbr_bits_off_target,
3596         cpi->rc.vbr_bits_off_target_fast,
3597         cpi->twopass.extend_minq,
3598         cpi->twopass.extend_minq_fast,
3599         cpi->rc.total_target_vs_actual,
3600         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3601         cpi->rc.total_actual_bits, cm->base_qindex,
3602         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3603         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3604             dc_quant_devisor,
3605         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3606                                 cm->bit_depth),
3607         cpi->rc.avg_q,
3608         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3609         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3610         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3611         cpi->twopass.bits_left,
3612         cpi->twopass.total_left_stats.coded_error,
3613         cpi->twopass.bits_left /
3614             (1 + cpi->twopass.total_left_stats.coded_error),
3615         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3616         cpi->twopass.kf_zeromotion_pct,
3617         cpi->twopass.fr_content_type,
3618         cm->lf.filter_level,
3619         cm->seg.aq_av_offset);
3620   }
3621   fclose(f);
3622
3623   if (0) {
3624     FILE *const fmodes = fopen("Modes.stt", "a");
3625     int i;
3626
3627     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3628             cm->frame_type, cpi->refresh_golden_frame,
3629             cpi->refresh_alt_ref_frame);
3630
3631     for (i = 0; i < MAX_MODES; ++i)
3632       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3633
3634     fprintf(fmodes, "\n");
3635
3636     fclose(fmodes);
3637   }
3638 }
3639 #endif
3640
3641 static void set_mv_search_params(VP9_COMP *cpi) {
3642   const VP9_COMMON *const cm = &cpi->common;
3643   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3644
3645   // Default based on max resolution.
3646   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3647
3648   if (cpi->sf.mv.auto_mv_step_size) {
3649     if (frame_is_intra_only(cm)) {
3650       // Initialize max_mv_magnitude for use in the first INTER frame
3651       // after a key/intra-only frame.
3652       cpi->max_mv_magnitude = max_mv_def;
3653     } else {
3654       if (cm->show_frame) {
3655         // Allow mv_steps to correspond to twice the max mv magnitude found
3656         // in the previous frame, capped by the default max_mv_magnitude based
3657         // on resolution.
3658         cpi->mv_step_param = vp9_init_search_range(
3659             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3660       }
3661       cpi->max_mv_magnitude = 0;
3662     }
3663   }
3664 }
3665
3666 static void set_size_independent_vars(VP9_COMP *cpi) {
3667   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3668   vp9_set_rd_speed_thresholds(cpi);
3669   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3670   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3671 }
3672
3673 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3674                                     int *top_index) {
3675   VP9_COMMON *const cm = &cpi->common;
3676
3677   // Setup variables that depend on the dimensions of the frame.
3678   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3679
3680   // Decide q and q bounds.
3681   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3682
3683   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3684     *q = cpi->rc.worst_quality;
3685     cpi->rc.force_max_q = 0;
3686   }
3687
3688   if (!frame_is_intra_only(cm)) {
3689     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3690   }
3691
3692 #if !CONFIG_REALTIME_ONLY
3693   // Configure experimental use of segmentation for enhanced coding of
3694   // static regions if indicated.
3695   // Only allowed in the second pass of a two pass encode, as it requires
3696   // lagged coding, and if the relevant speed feature flag is set.
3697   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3698     configure_static_seg_features(cpi);
3699 #endif  // !CONFIG_REALTIME_ONLY
3700
3701 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3702   if (cpi->oxcf.noise_sensitivity > 0) {
3703     int l = 0;
3704     switch (cpi->oxcf.noise_sensitivity) {
3705       case 1: l = 20; break;
3706       case 2: l = 40; break;
3707       case 3: l = 60; break;
3708       case 4:
3709       case 5: l = 100; break;
3710       case 6: l = 150; break;
3711     }
3712     if (!cpi->common.postproc_state.limits) {
3713       cpi->common.postproc_state.limits =
3714           vpx_calloc(cpi->un_scaled_source->y_width,
3715                      sizeof(*cpi->common.postproc_state.limits));
3716     }
3717     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3718                 cpi->common.postproc_state.limits);
3719   }
3720 #endif  // CONFIG_VP9_POSTPROC
3721 }
3722
3723 static void init_motion_estimation(VP9_COMP *cpi) {
3724   int y_stride = cpi->scaled_source.y_stride;
3725
3726   if (cpi->sf.mv.search_method == NSTEP) {
3727     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3728   } else if (cpi->sf.mv.search_method == DIAMOND) {
3729     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3730   }
3731 }
3732
3733 static void set_frame_size(VP9_COMP *cpi) {
3734   int ref_frame;
3735   VP9_COMMON *const cm = &cpi->common;
3736   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3737   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3738
3739 #if !CONFIG_REALTIME_ONLY
3740   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3741       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3742        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3743     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3744                          &oxcf->scaled_frame_height);
3745
3746     // There has been a change in frame size.
3747     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3748                          oxcf->scaled_frame_height);
3749   }
3750 #endif  // !CONFIG_REALTIME_ONLY
3751
3752   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
3753       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3754     // For SVC scaled width/height will have been set (svc->resize_set=1)
3755     // in get_svc_params based on the layer width/height.
3756     if (!cpi->use_svc || !cpi->svc.resize_set) {
3757       oxcf->scaled_frame_width =
3758           (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3759       oxcf->scaled_frame_height =
3760           (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3761       // There has been a change in frame size.
3762       vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3763                            oxcf->scaled_frame_height);
3764     }
3765
3766     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3767     set_mv_search_params(cpi);
3768
3769     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3770 #if CONFIG_VP9_TEMPORAL_DENOISING
3771     // Reset the denoiser on the resized frame.
3772     if (cpi->oxcf.noise_sensitivity > 0) {
3773       vp9_denoiser_free(&(cpi->denoiser));
3774       setup_denoiser_buffer(cpi);
3775       // Dynamic resize is only triggered for non-SVC, so we can force
3776       // golden frame update here as temporary fix to denoiser.
3777       cpi->refresh_golden_frame = 1;
3778     }
3779 #endif
3780   }
3781
3782   if ((oxcf->pass == 2) && !cpi->use_svc) {
3783     vp9_set_target_rate(cpi);
3784   }
3785
3786   alloc_frame_mvs(cm, cm->new_fb_idx);
3787
3788   // Reset the frame pointers to the current frame size.
3789   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3790                                cm->subsampling_x, cm->subsampling_y,
3791 #if CONFIG_VP9_HIGHBITDEPTH
3792                                cm->use_highbitdepth,
3793 #endif
3794                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3795                                NULL, NULL, NULL))
3796     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3797                        "Failed to allocate frame buffer");
3798
3799   alloc_util_frame_buffers(cpi);
3800   init_motion_estimation(cpi);
3801
3802   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3803     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3804     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3805
3806     ref_buf->idx = buf_idx;
3807
3808     if (buf_idx != INVALID_IDX) {
3809       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3810       ref_buf->buf = buf;
3811 #if CONFIG_VP9_HIGHBITDEPTH
3812       vp9_setup_scale_factors_for_frame(
3813           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3814           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3815 #else
3816       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3817                                         buf->y_crop_height, cm->width,
3818                                         cm->height);
3819 #endif  // CONFIG_VP9_HIGHBITDEPTH
3820       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3821     } else {
3822       ref_buf->buf = NULL;
3823     }
3824   }
3825
3826   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3827 }
3828
3829 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3830 static void save_encode_params(VP9_COMP *cpi) {
3831   VP9_COMMON *const cm = &cpi->common;
3832   const int tile_cols = 1 << cm->log2_tile_cols;
3833   const int tile_rows = 1 << cm->log2_tile_rows;
3834   int tile_col, tile_row;
3835   int i, j;
3836   RD_OPT *rd_opt = &cpi->rd;
3837   for (i = 0; i < MAX_REF_FRAMES; i++) {
3838     for (j = 0; j < REFERENCE_MODES; j++)
3839       rd_opt->prediction_type_threshes_prev[i][j] =
3840           rd_opt->prediction_type_threshes[i][j];
3841
3842     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3843       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3844   }
3845
3846   if (cpi->tile_data != NULL) {
3847     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3848       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3849         TileDataEnc *tile_data =
3850             &cpi->tile_data[tile_row * tile_cols + tile_col];
3851         for (i = 0; i < BLOCK_SIZES; ++i) {
3852           for (j = 0; j < MAX_MODES; ++j) {
3853             tile_data->thresh_freq_fact_prev[i][j] =
3854                 tile_data->thresh_freq_fact[i][j];
3855           }
3856         }
3857       }
3858   }
3859 }
3860 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3861
3862 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3863 #ifdef ENABLE_KF_DENOISE
3864   if (is_spatial_denoise_enabled(cpi)) {
3865     cpi->raw_source_frame = vp9_scale_if_required(
3866         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3867         (oxcf->pass == 0), EIGHTTAP, 0);
3868   } else {
3869     cpi->raw_source_frame = cpi->Source;
3870   }
3871 #else
3872   cpi->raw_source_frame = cpi->Source;
3873 #endif
3874 }
3875
3876 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3877                                       uint8_t *dest) {
3878   VP9_COMMON *const cm = &cpi->common;
3879   SVC *const svc = &cpi->svc;
3880   int q = 0, bottom_index = 0, top_index = 0;
3881   int no_drop_scene_change = 0;
3882   const INTERP_FILTER filter_scaler =
3883       (is_one_pass_cbr_svc(cpi))
3884           ? svc->downsample_filter_type[svc->spatial_layer_id]
3885           : EIGHTTAP;
3886   const int phase_scaler =
3887       (is_one_pass_cbr_svc(cpi))
3888           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3889           : 0;
3890
3891   if (cm->show_existing_frame) {
3892     cpi->rc.this_frame_target = 0;
3893     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3894     return 1;
3895   }
3896
3897   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3898
3899   // Flag to check if its valid to compute the source sad (used for
3900   // scene detection and for superblock content state in CBR mode).
3901   // The flag may get reset below based on SVC or resizing state.
3902   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3903
3904   vpx_clear_system_state();
3905
3906   set_frame_size(cpi);
3907
3908   if (is_one_pass_cbr_svc(cpi) &&
3909       cpi->un_scaled_source->y_width == cm->width << 2 &&
3910       cpi->un_scaled_source->y_height == cm->height << 2 &&
3911       svc->scaled_temp.y_width == cm->width << 1 &&
3912       svc->scaled_temp.y_height == cm->height << 1) {
3913     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3914     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3915     // result will be saved in scaled_temp and might be used later.
3916     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3917     const int phase_scaler2 = svc->downsample_filter_phase[1];
3918     cpi->Source = vp9_svc_twostage_scale(
3919         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3920         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3921     svc->scaled_one_half = 1;
3922   } else if (is_one_pass_cbr_svc(cpi) &&
3923              cpi->un_scaled_source->y_width == cm->width << 1 &&
3924              cpi->un_scaled_source->y_height == cm->height << 1 &&
3925              svc->scaled_one_half) {
3926     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3927     // two-stage scaling, use the result directly.
3928     cpi->Source = &svc->scaled_temp;
3929     svc->scaled_one_half = 0;
3930   } else {
3931     cpi->Source = vp9_scale_if_required(
3932         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3933         filter_scaler, phase_scaler);
3934   }
3935 #ifdef OUTPUT_YUV_SVC_SRC
3936   // Write out at most 3 spatial layers.
3937   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3938     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3939   }
3940 #endif
3941   // Unfiltered raw source used in metrics calculation if the source
3942   // has been filtered.
3943   if (is_psnr_calc_enabled(cpi)) {
3944 #ifdef ENABLE_KF_DENOISE
3945     if (is_spatial_denoise_enabled(cpi)) {
3946       cpi->raw_source_frame = vp9_scale_if_required(
3947           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3948           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3949     } else {
3950       cpi->raw_source_frame = cpi->Source;
3951     }
3952 #else
3953     cpi->raw_source_frame = cpi->Source;
3954 #endif
3955   }
3956
3957   if ((cpi->use_svc &&
3958        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3959         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3960         svc->current_superframe < 1)) ||
3961       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3962       cpi->resize_state != ORIG) {
3963     cpi->compute_source_sad_onepass = 0;
3964     if (cpi->content_state_sb_fd != NULL)
3965       memset(cpi->content_state_sb_fd, 0,
3966              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3967                  sizeof(*cpi->content_state_sb_fd));
3968   }
3969
3970   // Avoid scaling last_source unless its needed.
3971   // Last source is needed if avg_source_sad() is used, or if
3972   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3973   // estimation is enabled.
3974   if (cpi->unscaled_last_source != NULL &&
3975       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3976        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3977         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3978        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3979        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3980        cpi->compute_source_sad_onepass))
3981     cpi->Last_Source = vp9_scale_if_required(
3982         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3983         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3984
3985   if (cpi->Last_Source == NULL ||
3986       cpi->Last_Source->y_width != cpi->Source->y_width ||
3987       cpi->Last_Source->y_height != cpi->Source->y_height)
3988     cpi->compute_source_sad_onepass = 0;
3989
3990   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3991     memset(cpi->consec_zero_mv, 0,
3992            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3993   }
3994
3995 #if CONFIG_VP9_TEMPORAL_DENOISING
3996   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3997     vp9_denoiser_reset_on_first_frame(cpi);
3998 #endif
3999
4000   // Scene detection is always used for VBR mode or screen-content case.
4001   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
4002   // (need to check encoding time cost for doing this for speed 8).
4003   cpi->rc.high_source_sad = 0;
4004   cpi->rc.hybrid_intra_scene_change = 0;
4005   cpi->rc.re_encode_maxq_scene_change = 0;
4006   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
4007       (cpi->oxcf.rc_mode == VPX_VBR ||
4008        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
4009        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
4010     vp9_scene_detection_onepass(cpi);
4011
4012   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
4013     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
4014     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
4015     // On scene change reset temporal layer pattern to TL0.
4016     // Note that if the base/lower spatial layers are skipped: instead of
4017     // inserting base layer here, we force max-q for the next superframe
4018     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
4019     // when max-q is decided for the current layer.
4020     // Only do this reset for bypass/flexible mode.
4021     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
4022         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
4023       // rc->high_source_sad will get reset so copy it to restore it.
4024       int tmp_high_source_sad = cpi->rc.high_source_sad;
4025       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
4026       cpi->rc.high_source_sad = tmp_high_source_sad;
4027     }
4028   }
4029
4030   vp9_update_noise_estimate(cpi);
4031
4032   // For 1 pass CBR, check if we are dropping this frame.
4033   // Never drop on key frame, if base layer is key for svc,
4034   // on scene change, or if superframe has layer sync.
4035   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
4036       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
4037     no_drop_scene_change = 1;
4038   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
4039       !frame_is_intra_only(cm) && !no_drop_scene_change &&
4040       !svc->superframe_has_layer_sync &&
4041       (!cpi->use_svc ||
4042        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
4043     if (vp9_rc_drop_frame(cpi)) return 0;
4044   }
4045
4046   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
4047   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
4048   // avoid this frame-level upsampling (for non intra_only frames).
4049   // For SVC single_layer mode, dynamic resize is allowed and we need to
4050   // scale references for this case.
4051   if (frame_is_intra_only(cm) == 0 &&
4052       ((svc->single_layer_svc && cpi->oxcf.resize_mode == RESIZE_DYNAMIC) ||
4053        !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref))) {
4054     vp9_scale_references(cpi);
4055   }
4056
4057   set_size_independent_vars(cpi);
4058   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4059
4060   // search method and step parameter might be changed in speed settings.
4061   init_motion_estimation(cpi);
4062
4063   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
4064
4065   if (cpi->sf.svc_use_lowres_part &&
4066       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
4067     if (svc->prev_partition_svc == NULL) {
4068       CHECK_MEM_ERROR(
4069           cm, svc->prev_partition_svc,
4070           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
4071                                    sizeof(*svc->prev_partition_svc)));
4072     }
4073   }
4074
4075   // TODO(jianj): Look into issue of skin detection with high bitdepth.
4076   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
4077       cpi->oxcf.rc_mode == VPX_CBR &&
4078       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
4079       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4080     cpi->use_skin_detection = 1;
4081   }
4082
4083   // Enable post encode frame dropping for CBR on non key frame, when
4084   // ext_use_post_encode_drop is specified by user.
4085   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
4086                                  cpi->oxcf.rc_mode == VPX_CBR &&
4087                                  cm->frame_type != KEY_FRAME;
4088
4089   vp9_set_quantizer(cpi, q);
4090   vp9_set_variance_partition_thresholds(cpi, q, 0);
4091
4092   setup_frame(cpi);
4093
4094   suppress_active_map(cpi);
4095
4096   if (cpi->use_svc) {
4097     // On non-zero spatial layer, check for disabling inter-layer
4098     // prediction.
4099     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4100     vp9_svc_assert_constraints_pattern(cpi);
4101   }
4102
4103   if (cpi->rc.last_post_encode_dropped_scene_change) {
4104     cpi->rc.high_source_sad = 1;
4105     svc->high_source_sad_superframe = 1;
4106     // For now disable use_source_sad since Last_Source will not be the previous
4107     // encoded but the dropped one.
4108     cpi->sf.use_source_sad = 0;
4109     cpi->rc.last_post_encode_dropped_scene_change = 0;
4110   }
4111   // Check if this high_source_sad (scene/slide change) frame should be
4112   // encoded at high/max QP, and if so, set the q and adjust some rate
4113   // control parameters.
4114   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4115       (cpi->rc.high_source_sad ||
4116        (cpi->use_svc && svc->high_source_sad_superframe))) {
4117     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4118       vp9_set_quantizer(cpi, q);
4119       vp9_set_variance_partition_thresholds(cpi, q, 0);
4120     }
4121   }
4122
4123 #if !CONFIG_REALTIME_ONLY
4124   // Variance adaptive and in frame q adjustment experiments are mutually
4125   // exclusive.
4126   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4127     vp9_vaq_frame_setup(cpi);
4128   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4129     vp9_360aq_frame_setup(cpi);
4130   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4131     vp9_setup_in_frame_q_adj(cpi);
4132   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4133     // it may be pretty bad for rate-control,
4134     // and I should handle it somehow
4135     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4136   } else {
4137 #endif
4138     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4139       vp9_cyclic_refresh_setup(cpi);
4140     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4141       apply_roi_map(cpi);
4142     }
4143 #if !CONFIG_REALTIME_ONLY
4144   }
4145 #endif
4146
4147   apply_active_map(cpi);
4148
4149   vp9_encode_frame(cpi);
4150
4151   // Check if we should re-encode this frame at high Q because of high
4152   // overshoot based on the encoded frame size. Only for frames where
4153   // high temporal-source SAD is detected.
4154   // For SVC: all spatial layers are checked for re-encoding.
4155   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4156       (cpi->rc.high_source_sad ||
4157        (cpi->use_svc && svc->high_source_sad_superframe))) {
4158     int frame_size = 0;
4159     // Get an estimate of the encoded frame size.
4160     save_coding_context(cpi);
4161     vp9_pack_bitstream(cpi, dest, size);
4162     restore_coding_context(cpi);
4163     frame_size = (int)(*size) << 3;
4164     // Check if encoded frame will overshoot too much, and if so, set the q and
4165     // adjust some rate control parameters, and return to re-encode the frame.
4166     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4167       vpx_clear_system_state();
4168       vp9_set_quantizer(cpi, q);
4169       vp9_set_variance_partition_thresholds(cpi, q, 0);
4170       suppress_active_map(cpi);
4171       // Turn-off cyclic refresh for re-encoded frame.
4172       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4173         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4174         unsigned char *const seg_map = cpi->segmentation_map;
4175         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4176         memset(cr->last_coded_q_map, MAXQ,
4177                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4178         cr->sb_index = 0;
4179         vp9_disable_segmentation(&cm->seg);
4180       }
4181       apply_active_map(cpi);
4182       vp9_encode_frame(cpi);
4183     }
4184   }
4185
4186   // Update some stats from cyclic refresh, and check for golden frame update.
4187   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4188       !frame_is_intra_only(cm))
4189     vp9_cyclic_refresh_postencode(cpi);
4190
4191   // Update the skip mb flag probabilities based on the distribution
4192   // seen in the last encoder iteration.
4193   // update_base_skip_probs(cpi);
4194   vpx_clear_system_state();
4195   return 1;
4196 }
4197
4198 #if !CONFIG_REALTIME_ONLY
4199 #define MAX_QSTEP_ADJ 4
4200 static int get_qstep_adj(int rate_excess, int rate_limit) {
4201   int qstep =
4202       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4203   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4204 }
4205
4206 #if CONFIG_RATE_CTRL
4207 #define RATE_CTRL_MAX_RECODE_NUM 7
4208
4209 typedef struct RATE_QINDEX_HISTORY {
4210   int recode_count;
4211   int q_index_history[RATE_CTRL_MAX_RECODE_NUM];
4212   int rate_history[RATE_CTRL_MAX_RECODE_NUM];
4213   int q_index_high;
4214   int q_index_low;
4215 } RATE_QINDEX_HISTORY;
4216
4217 static void init_rq_history(RATE_QINDEX_HISTORY *rq_history) {
4218   rq_history->recode_count = 0;
4219   rq_history->q_index_high = 255;
4220   rq_history->q_index_low = 0;
4221 }
4222
4223 static void update_rq_history(RATE_QINDEX_HISTORY *rq_history, int target_bits,
4224                               int actual_bits, int q_index) {
4225   rq_history->q_index_history[rq_history->recode_count] = q_index;
4226   rq_history->rate_history[rq_history->recode_count] = actual_bits;
4227   if (actual_bits <= target_bits) {
4228     rq_history->q_index_high = q_index;
4229   }
4230   if (actual_bits >= target_bits) {
4231     rq_history->q_index_low = q_index;
4232   }
4233   rq_history->recode_count += 1;
4234 }
4235
4236 static int guess_q_index_from_model(const RATE_QSTEP_MODEL *rq_model,
4237                                     int target_bits) {
4238   // The model predicts bits as follows.
4239   // target_bits = bias - ratio * log2(q_step)
4240   // Given the target_bits, we compute the q_step as follows.
4241   const double q_step =
4242       pow(2.0, (rq_model->bias - target_bits) / rq_model->ratio);
4243   // TODO(angiebird): Make this function support highbitdepth.
4244   return vp9_convert_q_to_qindex(q_step, VPX_BITS_8);
4245 }
4246
4247 static int guess_q_index_linear(int prev_q_index, int target_bits,
4248                                 int actual_bits, int gap) {
4249   int q_index = prev_q_index;
4250   if (actual_bits < target_bits) {
4251     q_index -= gap;
4252     q_index = VPXMAX(q_index, 0);
4253   } else {
4254     q_index += gap;
4255     q_index = VPXMIN(q_index, 255);
4256   }
4257   return q_index;
4258 }
4259
4260 static double get_bits_percent_diff(int target_bits, int actual_bits) {
4261   double diff = abs(target_bits - actual_bits) * 1. / target_bits;
4262   return diff * 100;
4263 }
4264
4265 static int rq_model_predict_q_index(const RATE_QSTEP_MODEL *rq_model,
4266                                     const RATE_QINDEX_HISTORY *rq_history,
4267                                     int target_bits) {
4268   int q_index = 128;
4269   if (rq_history->recode_count > 0) {
4270     const int actual_bits =
4271         rq_history->rate_history[rq_history->recode_count - 1];
4272     const int prev_q_index =
4273         rq_history->q_index_history[rq_history->recode_count - 1];
4274     const double percent_diff = get_bits_percent_diff(target_bits, actual_bits);
4275     if (percent_diff > 50) {
4276       // Binary search.
4277       // When the actual_bits and target_bits are far apart, binary search
4278       // q_index is faster.
4279       q_index = (rq_history->q_index_low + rq_history->q_index_high) / 2;
4280     } else {
4281       if (rq_model->ready) {
4282         q_index = guess_q_index_from_model(rq_model, target_bits);
4283       } else {
4284         // TODO(angiebird): Find a better way to set the gap.
4285         q_index =
4286             guess_q_index_linear(prev_q_index, target_bits, actual_bits, 20);
4287       }
4288     }
4289   } else {
4290     if (rq_model->ready) {
4291       q_index = guess_q_index_from_model(rq_model, target_bits);
4292     }
4293   }
4294
4295   assert(rq_history->q_index_low <= rq_history->q_index_high);
4296   if (q_index <= rq_history->q_index_low) {
4297     q_index = rq_history->q_index_low + 1;
4298   }
4299   if (q_index >= rq_history->q_index_high) {
4300     q_index = rq_history->q_index_high - 1;
4301   }
4302   return q_index;
4303 }
4304
4305 static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
4306                             int target_bits, RATE_QSTEP_MODEL *rq_model) {
4307   const int recode_count = rq_history->recode_count;
4308   if (recode_count >= 2) {
4309     // Fit the ratio and bias of rq_model based on last two recode histories.
4310     const double s1 = vp9_convert_qindex_to_q(
4311         rq_history->q_index_history[recode_count - 2], VPX_BITS_8);
4312     const double s2 = vp9_convert_qindex_to_q(
4313         rq_history->q_index_history[recode_count - 1], VPX_BITS_8);
4314     const double r1 = rq_history->rate_history[recode_count - 2];
4315     const double r2 = rq_history->rate_history[recode_count - 1];
4316     rq_model->ratio = (r2 - r1) / (log2(s1) - log2(s2));
4317     rq_model->bias = r1 + (rq_model->ratio) * log2(s1);
4318     rq_model->ready = 1;
4319   } else if (recode_count == 1) {
4320     if (rq_model->ready) {
4321       // Update the ratio only when the initial model exists and we only have
4322       // one recode history.
4323       const int prev_q = rq_history->q_index_history[recode_count - 1];
4324       const double prev_q_step = vp9_convert_qindex_to_q(prev_q, VPX_BITS_8);
4325       const int actual_bits = rq_history->rate_history[recode_count - 1];
4326       rq_model->ratio =
4327           rq_model->ratio + (target_bits - actual_bits) / log2(prev_q_step);
4328     }
4329   }
4330 }
4331 #endif  // CONFIG_RATE_CTRL
4332
4333 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4334                                     uint8_t *dest) {
4335   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4336   VP9_COMMON *const cm = &cpi->common;
4337   RATE_CONTROL *const rc = &cpi->rc;
4338   int bottom_index, top_index;
4339   int loop_count = 0;
4340   int loop_at_this_size = 0;
4341   int loop = 0;
4342   int overshoot_seen = 0;
4343   int undershoot_seen = 0;
4344   int frame_over_shoot_limit;
4345   int frame_under_shoot_limit;
4346   int q = 0, q_low = 0, q_high = 0;
4347   int enable_acl;
4348 #ifdef AGGRESSIVE_VBR
4349   int qrange_adj = 1;
4350 #endif
4351
4352 #if CONFIG_RATE_CTRL
4353   const FRAME_UPDATE_TYPE update_type =
4354       cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
4355   const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
4356   RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
4357   RATE_QINDEX_HISTORY rq_history;
4358   init_rq_history(&rq_history);
4359 #endif  // CONFIG_RATE_CTRL
4360
4361   if (cm->show_existing_frame) {
4362     rc->this_frame_target = 0;
4363     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4364     return;
4365   }
4366
4367   set_size_independent_vars(cpi);
4368
4369   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4370                                        (cpi->twopass.gf_group.index == 1)
4371                                  : 0;
4372
4373   do {
4374     vpx_clear_system_state();
4375
4376     set_frame_size(cpi);
4377
4378     if (loop_count == 0 || cpi->resize_pending != 0) {
4379       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4380
4381 #ifdef AGGRESSIVE_VBR
4382       if (two_pass_first_group_inter(cpi)) {
4383         // Adjustment limits for min and max q
4384         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4385
4386         bottom_index =
4387             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4388         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4389       }
4390 #endif
4391       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4392       set_mv_search_params(cpi);
4393
4394       // Reset the loop state for new frame size.
4395       overshoot_seen = 0;
4396       undershoot_seen = 0;
4397
4398       // Reconfiguration for change in frame size has concluded.
4399       cpi->resize_pending = 0;
4400
4401       q_low = bottom_index;
4402       q_high = top_index;
4403
4404       loop_at_this_size = 0;
4405     }
4406
4407 #if CONFIG_RATE_CTRL
4408     if (cpi->encode_command.use_external_target_frame_bits) {
4409       q = rq_model_predict_q_index(rq_model, &rq_history,
4410                                    rc->this_frame_target);
4411     }
4412 #endif  // CONFIG_RATE_CTRL
4413     // Decide frame size bounds first time through.
4414     if (loop_count == 0) {
4415       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4416                                        &frame_under_shoot_limit,
4417                                        &frame_over_shoot_limit);
4418     }
4419
4420     cpi->Source =
4421         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4422                               (oxcf->pass == 0), EIGHTTAP, 0);
4423
4424     // Unfiltered raw source used in metrics calculation if the source
4425     // has been filtered.
4426     if (is_psnr_calc_enabled(cpi)) {
4427 #ifdef ENABLE_KF_DENOISE
4428       if (is_spatial_denoise_enabled(cpi)) {
4429         cpi->raw_source_frame = vp9_scale_if_required(
4430             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4431             (oxcf->pass == 0), EIGHTTAP, 0);
4432       } else {
4433         cpi->raw_source_frame = cpi->Source;
4434       }
4435 #else
4436       cpi->raw_source_frame = cpi->Source;
4437 #endif
4438     }
4439
4440     if (cpi->unscaled_last_source != NULL)
4441       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4442                                                &cpi->scaled_last_source,
4443                                                (oxcf->pass == 0), EIGHTTAP, 0);
4444
4445     if (frame_is_intra_only(cm) == 0) {
4446       if (loop_count > 0) {
4447         release_scaled_references(cpi);
4448       }
4449       vp9_scale_references(cpi);
4450     }
4451
4452 #if CONFIG_RATE_CTRL
4453     // TODO(angiebird): This is a hack for making sure the encoder use the
4454     // external_quantize_index exactly. Avoid this kind of hack later.
4455     if (cpi->encode_command.use_external_quantize_index) {
4456       q = cpi->encode_command.external_quantize_index;
4457     }
4458 #endif
4459
4460     vp9_set_quantizer(cpi, q);
4461
4462     if (loop_count == 0) setup_frame(cpi);
4463
4464     // Variance adaptive and in frame q adjustment experiments are mutually
4465     // exclusive.
4466     if (oxcf->aq_mode == VARIANCE_AQ) {
4467       vp9_vaq_frame_setup(cpi);
4468     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4469       vp9_360aq_frame_setup(cpi);
4470     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4471       vp9_setup_in_frame_q_adj(cpi);
4472     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4473       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4474     } else if (oxcf->aq_mode == PSNR_AQ) {
4475       vp9_psnr_aq_mode_setup(&cm->seg);
4476     }
4477
4478     vp9_encode_frame(cpi);
4479
4480     // Update the skip mb flag probabilities based on the distribution
4481     // seen in the last encoder iteration.
4482     // update_base_skip_probs(cpi);
4483
4484     vpx_clear_system_state();
4485
4486     // Dummy pack of the bitstream using up to date stats to get an
4487     // accurate estimate of output frame size to determine if we need
4488     // to recode.
4489     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4490       save_coding_context(cpi);
4491       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4492
4493       rc->projected_frame_size = (int)(*size) << 3;
4494
4495       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4496     }
4497
4498 #if CONFIG_RATE_CTRL
4499     // This part needs to be after save_coding_context() because
4500     // restore_coding_context will be called in the end of this function.
4501     // TODO(angiebird): This is a hack for making sure the encoder use the
4502     // external_quantize_index exactly. Avoid this kind of hack later.
4503     if (cpi->encode_command.use_external_quantize_index) {
4504       break;
4505     }
4506
4507     if (cpi->encode_command.use_external_target_frame_bits) {
4508       const double percent_diff = get_bits_percent_diff(
4509           rc->this_frame_target, rc->projected_frame_size);
4510       update_rq_history(&rq_history, rc->this_frame_target,
4511                         rc->projected_frame_size, q);
4512       loop_count += 1;
4513
4514       rq_model_update(&rq_history, rc->this_frame_target, rq_model);
4515
4516       // Check if we hit the target bitrate.
4517       if (percent_diff <= 15 ||
4518           rq_history.recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
4519           rq_history.q_index_low >= rq_history.q_index_high) {
4520         break;
4521       }
4522
4523       loop = 1;
4524       restore_coding_context(cpi);
4525       continue;
4526     }
4527 #endif  // CONFIG_RATE_CTRL
4528
4529     if (oxcf->rc_mode == VPX_Q) {
4530       loop = 0;
4531     } else {
4532       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4533           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4534         int last_q = q;
4535         int64_t kf_err;
4536
4537         int64_t high_err_target = cpi->ambient_err;
4538         int64_t low_err_target = cpi->ambient_err >> 1;
4539
4540 #if CONFIG_VP9_HIGHBITDEPTH
4541         if (cm->use_highbitdepth) {
4542           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4543         } else {
4544           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4545         }
4546 #else
4547         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4548 #endif  // CONFIG_VP9_HIGHBITDEPTH
4549
4550         // Prevent possible divide by zero error below for perfect KF
4551         kf_err += !kf_err;
4552
4553         // The key frame is not good enough or we can afford
4554         // to make it better without undue risk of popping.
4555         if ((kf_err > high_err_target &&
4556              rc->projected_frame_size <= frame_over_shoot_limit) ||
4557             (kf_err > low_err_target &&
4558              rc->projected_frame_size <= frame_under_shoot_limit)) {
4559           // Lower q_high
4560           q_high = q > q_low ? q - 1 : q_low;
4561
4562           // Adjust Q
4563           q = (int)((q * high_err_target) / kf_err);
4564           q = VPXMIN(q, (q_high + q_low) >> 1);
4565         } else if (kf_err < low_err_target &&
4566                    rc->projected_frame_size >= frame_under_shoot_limit) {
4567           // The key frame is much better than the previous frame
4568           // Raise q_low
4569           q_low = q < q_high ? q + 1 : q_high;
4570
4571           // Adjust Q
4572           q = (int)((q * low_err_target) / kf_err);
4573           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4574         }
4575
4576         // Clamp Q to upper and lower limits:
4577         q = clamp(q, q_low, q_high);
4578
4579         loop = q != last_q;
4580       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4581                                   frame_under_shoot_limit, q,
4582                                   VPXMAX(q_high, top_index), bottom_index)) {
4583         // Is the projected frame size out of range and are we allowed
4584         // to attempt to recode.
4585         int last_q = q;
4586         int retries = 0;
4587         int qstep;
4588
4589         if (cpi->resize_pending == 1) {
4590           // Change in frame size so go back around the recode loop.
4591           cpi->rc.frame_size_selector =
4592               SCALE_STEP1 - cpi->rc.frame_size_selector;
4593           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4594
4595 #if CONFIG_INTERNAL_STATS
4596           ++cpi->tot_recode_hits;
4597 #endif
4598           ++loop_count;
4599           loop = 1;
4600           continue;
4601         }
4602
4603         // Frame size out of permitted range:
4604         // Update correction factor & compute new Q to try...
4605
4606         // Frame is too large
4607         if (rc->projected_frame_size > rc->this_frame_target) {
4608           // Special case if the projected size is > the max allowed.
4609           if ((q == q_high) &&
4610               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4611                (!rc->is_src_frame_alt_ref &&
4612                 (rc->projected_frame_size >=
4613                  big_rate_miss_high_threshold(cpi))))) {
4614             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4615                                             big_rate_miss_high_threshold(cpi)));
4616             double q_val_high;
4617             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4618             q_val_high =
4619                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4620             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4621             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4622           }
4623
4624           // Raise Qlow as to at least the current value
4625           qstep =
4626               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4627           q_low = VPXMIN(q + qstep, q_high);
4628
4629           if (undershoot_seen || loop_at_this_size > 1) {
4630             // Update rate_correction_factor unless
4631             vp9_rc_update_rate_correction_factors(cpi);
4632
4633             q = (q_high + q_low + 1) / 2;
4634           } else {
4635             // Update rate_correction_factor unless
4636             vp9_rc_update_rate_correction_factors(cpi);
4637
4638             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4639                                   VPXMAX(q_high, top_index));
4640
4641             while (q < q_low && retries < 10) {
4642               vp9_rc_update_rate_correction_factors(cpi);
4643               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4644                                     VPXMAX(q_high, top_index));
4645               retries++;
4646             }
4647           }
4648
4649           overshoot_seen = 1;
4650         } else {
4651           // Frame is too small
4652           qstep =
4653               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4654           q_high = VPXMAX(q - qstep, q_low);
4655
4656           if (overshoot_seen || loop_at_this_size > 1) {
4657             vp9_rc_update_rate_correction_factors(cpi);
4658             q = (q_high + q_low) / 2;
4659           } else {
4660             vp9_rc_update_rate_correction_factors(cpi);
4661             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4662                                   VPXMIN(q_low, bottom_index), top_index);
4663             // Special case reset for qlow for constrained quality.
4664             // This should only trigger where there is very substantial
4665             // undershoot on a frame and the auto cq level is above
4666             // the user passed in value.
4667             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4668               q_low = q;
4669             }
4670
4671             while (q > q_high && retries < 10) {
4672               vp9_rc_update_rate_correction_factors(cpi);
4673               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4674                                     VPXMIN(q_low, bottom_index), top_index);
4675               retries++;
4676             }
4677           }
4678           undershoot_seen = 1;
4679         }
4680
4681         // Clamp Q to upper and lower limits:
4682         q = clamp(q, q_low, q_high);
4683
4684         loop = (q != last_q);
4685       } else {
4686         loop = 0;
4687       }
4688     }
4689
4690     // Special case for overlay frame.
4691     if (rc->is_src_frame_alt_ref &&
4692         rc->projected_frame_size < rc->max_frame_bandwidth)
4693       loop = 0;
4694
4695     if (loop) {
4696       ++loop_count;
4697       ++loop_at_this_size;
4698
4699 #if CONFIG_INTERNAL_STATS
4700       ++cpi->tot_recode_hits;
4701 #endif
4702     }
4703
4704     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4705       if (loop) restore_coding_context(cpi);
4706   } while (loop);
4707
4708 #ifdef AGGRESSIVE_VBR
4709   if (two_pass_first_group_inter(cpi)) {
4710     cpi->twopass.active_worst_quality =
4711         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4712   } else if (!frame_is_kf_gf_arf(cpi)) {
4713 #else
4714   if (!frame_is_kf_gf_arf(cpi)) {
4715 #endif
4716     // Have we been forced to adapt Q outside the expected range by an extreme
4717     // rate miss. If so adjust the active maxQ for the subsequent frames.
4718     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4719       cpi->twopass.active_worst_quality = q;
4720     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4721                rc->projected_frame_size < rc->this_frame_target) {
4722       cpi->twopass.active_worst_quality =
4723           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4724     }
4725   }
4726
4727   if (enable_acl) {
4728     // Skip recoding, if model diff is below threshold
4729     const int thresh = compute_context_model_thresh(cpi);
4730     const int diff = compute_context_model_diff(cm);
4731     if (diff >= thresh) {
4732       vp9_encode_frame(cpi);
4733     }
4734   }
4735   if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4736     vpx_clear_system_state();
4737     restore_coding_context(cpi);
4738   }
4739 }
4740 #endif  // !CONFIG_REALTIME_ONLY
4741
4742 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4743   const int *const map = cpi->common.ref_frame_map;
4744   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4745   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4746   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4747   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4748
4749   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4750
4751   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4752       (cpi->svc.number_temporal_layers == 1 &&
4753        cpi->svc.number_spatial_layers == 1))
4754     flags &= ~VP9_GOLD_FLAG;
4755
4756   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4757
4758   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4759
4760   return flags;
4761 }
4762
4763 static void set_ext_overrides(VP9_COMP *cpi) {
4764   // Overrides the defaults with the externally supplied values with
4765   // vp9_update_reference() and vp9_update_entropy() calls
4766   // Note: The overrides are valid only for the next frame passed
4767   // to encode_frame_to_data_rate() function
4768   if (cpi->ext_refresh_frame_context_pending) {
4769     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4770     cpi->ext_refresh_frame_context_pending = 0;
4771   }
4772   if (cpi->ext_refresh_frame_flags_pending) {
4773     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4774     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4775     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4776   }
4777 }
4778
4779 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4780     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4781     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4782     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4783   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4784       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4785 #if CONFIG_VP9_HIGHBITDEPTH
4786     if (cm->bit_depth == VPX_BITS_8) {
4787       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4788                                  phase_scaler2);
4789       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4790                                  phase_scaler);
4791     } else {
4792       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4793                              filter_type2, phase_scaler2);
4794       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4795                              filter_type, phase_scaler);
4796     }
4797 #else
4798     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4799                                phase_scaler2);
4800     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4801 #endif  // CONFIG_VP9_HIGHBITDEPTH
4802     return scaled;
4803   } else {
4804     return unscaled;
4805   }
4806 }
4807
4808 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4809     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4810     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4811   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4812       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4813 #if CONFIG_VP9_HIGHBITDEPTH
4814     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4815         unscaled->y_height <= (scaled->y_height << 1))
4816       if (cm->bit_depth == VPX_BITS_8)
4817         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4818       else
4819         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4820                                filter_type, phase_scaler);
4821     else
4822       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4823 #else
4824     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4825         unscaled->y_height <= (scaled->y_height << 1))
4826       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4827     else
4828       scale_and_extend_frame_nonnormative(unscaled, scaled);
4829 #endif  // CONFIG_VP9_HIGHBITDEPTH
4830     return scaled;
4831   } else {
4832     return unscaled;
4833   }
4834 }
4835
4836 static void set_ref_sign_bias(VP9_COMP *cpi) {
4837   VP9_COMMON *const cm = &cpi->common;
4838   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4839   const int cur_frame_index = ref_buffer->frame_index;
4840   MV_REFERENCE_FRAME ref_frame;
4841
4842   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4843     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4844     const RefCntBuffer *const ref_cnt_buf =
4845         get_ref_cnt_buffer(&cpi->common, buf_idx);
4846     if (ref_cnt_buf) {
4847       cm->ref_frame_sign_bias[ref_frame] =
4848           cur_frame_index < ref_cnt_buf->frame_index;
4849     }
4850   }
4851 }
4852
4853 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4854   INTERP_FILTER ifilter;
4855   int ref_total[MAX_REF_FRAMES] = { 0 };
4856   MV_REFERENCE_FRAME ref;
4857   int mask = 0;
4858   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4859     return mask;
4860   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4861     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4862       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4863
4864   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4865     if ((ref_total[LAST_FRAME] &&
4866          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4867         (ref_total[GOLDEN_FRAME] == 0 ||
4868          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4869              ref_total[GOLDEN_FRAME]) &&
4870         (ref_total[ALTREF_FRAME] == 0 ||
4871          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4872              ref_total[ALTREF_FRAME]))
4873       mask |= 1 << ifilter;
4874   }
4875   return mask;
4876 }
4877
4878 #ifdef ENABLE_KF_DENOISE
4879 // Baseline kernel weights for denoise
4880 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4881 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4882                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4883
4884 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4885                                      uint8_t point_weight, int *sum_val,
4886                                      int *sum_weight) {
4887   if (abs(centre_val - data_val) <= thresh) {
4888     *sum_weight += point_weight;
4889     *sum_val += (int)data_val * (int)point_weight;
4890   }
4891 }
4892
4893 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4894                                   const int strength) {
4895   int sum_weight = 0;
4896   int sum_val = 0;
4897   int thresh = strength;
4898   int kernal_size = 5;
4899   int half_k_size = 2;
4900   int i, j;
4901   int max_diff = 0;
4902   uint8_t *tmp_ptr;
4903   uint8_t *kernal_ptr;
4904
4905   // Find the maximum deviation from the source point in the locale.
4906   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4907   for (i = 0; i < kernal_size + 2; ++i) {
4908     for (j = 0; j < kernal_size + 2; ++j) {
4909       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4910     }
4911     tmp_ptr += stride;
4912   }
4913
4914   // Select the kernel size.
4915   if (max_diff > (strength + (strength >> 1))) {
4916     kernal_size = 3;
4917     half_k_size = 1;
4918     thresh = thresh >> 1;
4919   }
4920   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4921
4922   // Apply the kernel
4923   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4924   for (i = 0; i < kernal_size; ++i) {
4925     for (j = 0; j < kernal_size; ++j) {
4926       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4927                         &sum_val, &sum_weight);
4928       ++kernal_ptr;
4929     }
4930     tmp_ptr += stride;
4931   }
4932
4933   // Update the source value with the new filtered value
4934   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4935 }
4936
4937 #if CONFIG_VP9_HIGHBITDEPTH
4938 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4939                                          const int strength) {
4940   int sum_weight = 0;
4941   int sum_val = 0;
4942   int thresh = strength;
4943   int kernal_size = 5;
4944   int half_k_size = 2;
4945   int i, j;
4946   int max_diff = 0;
4947   uint16_t *tmp_ptr;
4948   uint8_t *kernal_ptr;
4949
4950   // Find the maximum deviation from the source point in the locale.
4951   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4952   for (i = 0; i < kernal_size + 2; ++i) {
4953     for (j = 0; j < kernal_size + 2; ++j) {
4954       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4955     }
4956     tmp_ptr += stride;
4957   }
4958
4959   // Select the kernel size.
4960   if (max_diff > (strength + (strength >> 1))) {
4961     kernal_size = 3;
4962     half_k_size = 1;
4963     thresh = thresh >> 1;
4964   }
4965   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4966
4967   // Apply the kernel
4968   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4969   for (i = 0; i < kernal_size; ++i) {
4970     for (j = 0; j < kernal_size; ++j) {
4971       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4972                         &sum_val, &sum_weight);
4973       ++kernal_ptr;
4974     }
4975     tmp_ptr += stride;
4976   }
4977
4978   // Update the source value with the new filtered value
4979   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4980 }
4981 #endif  // CONFIG_VP9_HIGHBITDEPTH
4982
4983 // Apply thresholded spatial noise suppression to a given buffer.
4984 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4985                                    const int stride, const int width,
4986                                    const int height, const int strength) {
4987   VP9_COMMON *const cm = &cpi->common;
4988   uint8_t *src_ptr = buffer;
4989   int row;
4990   int col;
4991
4992   for (row = 0; row < height; ++row) {
4993     for (col = 0; col < width; ++col) {
4994 #if CONFIG_VP9_HIGHBITDEPTH
4995       if (cm->use_highbitdepth)
4996         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
4997                                      strength);
4998       else
4999         spatial_denoise_point(&src_ptr[col], stride, strength);
5000 #else
5001       spatial_denoise_point(&src_ptr[col], stride, strength);
5002 #endif  // CONFIG_VP9_HIGHBITDEPTH
5003     }
5004     src_ptr += stride;
5005   }
5006 }
5007
5008 // Apply thresholded spatial noise suppression to source.
5009 static void spatial_denoise_frame(VP9_COMP *cpi) {
5010   YV12_BUFFER_CONFIG *src = cpi->Source;
5011   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5012   TWO_PASS *const twopass = &cpi->twopass;
5013   VP9_COMMON *const cm = &cpi->common;
5014
5015   // Base the filter strength on the current active max Q.
5016   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
5017                                               cm->bit_depth));
5018   int strength =
5019       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
5020
5021   // Denoise each of Y,U and V buffers.
5022   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
5023                          src->y_height, strength);
5024
5025   strength += (strength >> 1);
5026   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
5027                          src->uv_height, strength << 1);
5028
5029   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
5030                          src->uv_height, strength << 1);
5031 }
5032 #endif  // ENABLE_KF_DENOISE
5033
5034 #if !CONFIG_REALTIME_ONLY
5035 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
5036                                          uint8_t *dest) {
5037   if (cpi->common.seg.enabled)
5038     if (ALT_REF_AQ_PROTECT_GAIN) {
5039       size_t nsize = *size;
5040       int overhead;
5041
5042       // TODO(yuryg): optimize this, as
5043       // we don't really need to repack
5044
5045       save_coding_context(cpi);
5046       vp9_disable_segmentation(&cpi->common.seg);
5047       vp9_pack_bitstream(cpi, dest, &nsize);
5048       restore_coding_context(cpi);
5049
5050       overhead = (int)*size - (int)nsize;
5051
5052       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
5053         vp9_encode_frame(cpi);
5054       else
5055         vp9_enable_segmentation(&cpi->common.seg);
5056     }
5057 }
5058 #endif
5059
5060 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
5061   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
5062
5063   if (ref_buffer) {
5064     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5065     ref_buffer->frame_index =
5066         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
5067 #if CONFIG_RATE_CTRL
5068     ref_buffer->frame_coding_index = cm->current_frame_coding_index;
5069 #endif  // CONFIG_RATE_CTRL
5070   }
5071 }
5072
5073 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
5074   VP9_COMMON *cm = &cpi->common;
5075   ThreadData *td = &cpi->td;
5076   MACROBLOCK *x = &td->mb;
5077   MACROBLOCKD *xd = &x->e_mbd;
5078   uint8_t *y_buffer = cpi->Source->y_buffer;
5079   const int y_stride = cpi->Source->y_stride;
5080   const int block_size = BLOCK_16X16;
5081
5082   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
5083   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
5084   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
5085   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
5086   double log_sum = 0.0;
5087   int row, col;
5088
5089   // Loop through each 64x64 block.
5090   for (row = 0; row < num_rows; ++row) {
5091     for (col = 0; col < num_cols; ++col) {
5092       int mi_row, mi_col;
5093       double var = 0.0, num_of_var = 0.0;
5094       const int index = row * num_cols + col;
5095
5096       for (mi_row = row * num_8x8_h;
5097            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
5098         for (mi_col = col * num_8x8_w;
5099              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
5100           struct buf_2d buf;
5101           const int row_offset_y = mi_row << 3;
5102           const int col_offset_y = mi_col << 3;
5103
5104           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
5105           buf.stride = y_stride;
5106
5107           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
5108           // and high bit videos, the variance needs to be divided by 2.0 or
5109           // 64.0 separately.
5110           // TODO(sdeng): need to tune for 12bit videos.
5111 #if CONFIG_VP9_HIGHBITDEPTH
5112           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
5113             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
5114           else
5115 #endif
5116             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
5117
5118           num_of_var += 1.0;
5119         }
5120       }
5121       var = var / num_of_var / 64.0;
5122
5123       // Curve fitting with an exponential model on all 16x16 blocks from the
5124       // Midres dataset.
5125       var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
5126       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
5127       log_sum += log(var);
5128     }
5129   }
5130   log_sum = exp(log_sum / (double)(num_rows * num_cols));
5131
5132   for (row = 0; row < num_rows; ++row) {
5133     for (col = 0; col < num_cols; ++col) {
5134       const int index = row * num_cols + col;
5135       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
5136     }
5137   }
5138
5139   (void)xd;
5140 }
5141
5142 // Process the wiener variance in 16x16 block basis.
5143 static int qsort_comp(const void *elem1, const void *elem2) {
5144   int a = *((const int *)elem1);
5145   int b = *((const int *)elem2);
5146   if (a > b) return 1;
5147   if (a < b) return -1;
5148   return 0;
5149 }
5150
5151 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
5152   VP9_COMMON *cm = &cpi->common;
5153
5154   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
5155       cpi->mb_wiener_var_cols >= cm->mb_cols)
5156     return;
5157
5158   vpx_free(cpi->mb_wiener_variance);
5159   cpi->mb_wiener_variance = NULL;
5160
5161   CHECK_MEM_ERROR(
5162       cm, cpi->mb_wiener_variance,
5163       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
5164   cpi->mb_wiener_var_rows = cm->mb_rows;
5165   cpi->mb_wiener_var_cols = cm->mb_cols;
5166 }
5167
5168 static void set_mb_wiener_variance(VP9_COMP *cpi) {
5169   VP9_COMMON *cm = &cpi->common;
5170   uint8_t *buffer = cpi->Source->y_buffer;
5171   int buf_stride = cpi->Source->y_stride;
5172
5173 #if CONFIG_VP9_HIGHBITDEPTH
5174   ThreadData *td = &cpi->td;
5175   MACROBLOCK *x = &td->mb;
5176   MACROBLOCKD *xd = &x->e_mbd;
5177   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
5178   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
5179   uint8_t *zero_pred;
5180 #else
5181   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
5182 #endif
5183
5184   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
5185   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
5186
5187   int mb_row, mb_col, count = 0;
5188   // Hard coded operating block size
5189   const int block_size = 16;
5190   const int coeff_count = block_size * block_size;
5191   const TX_SIZE tx_size = TX_16X16;
5192
5193 #if CONFIG_VP9_HIGHBITDEPTH
5194   xd->cur_buf = cpi->Source;
5195   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5196     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
5197     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
5198   } else {
5199     zero_pred = zero_pred8;
5200     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
5201   }
5202 #else
5203   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
5204 #endif
5205
5206   cpi->norm_wiener_variance = 0;
5207
5208   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
5209     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
5210       int idx;
5211       int16_t median_val = 0;
5212       uint8_t *mb_buffer =
5213           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
5214       int64_t wiener_variance = 0;
5215
5216 #if CONFIG_VP9_HIGHBITDEPTH
5217       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5218         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
5219                                   mb_buffer, buf_stride, zero_pred, block_size,
5220                                   xd->bd);
5221         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5222       } else {
5223         vpx_subtract_block(block_size, block_size, src_diff, block_size,
5224                            mb_buffer, buf_stride, zero_pred, block_size);
5225         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5226       }
5227 #else
5228       vpx_subtract_block(block_size, block_size, src_diff, block_size,
5229                          mb_buffer, buf_stride, zero_pred, block_size);
5230       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5231 #endif  // CONFIG_VP9_HIGHBITDEPTH
5232
5233       coeff[0] = 0;
5234       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
5235
5236       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
5237
5238       // Noise level estimation
5239       median_val = coeff[coeff_count / 2];
5240
5241       // Wiener filter
5242       for (idx = 1; idx < coeff_count; ++idx) {
5243         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
5244         int64_t tmp_coeff = (int64_t)coeff[idx];
5245         if (median_val) {
5246           tmp_coeff = (sqr_coeff * coeff[idx]) /
5247                       (sqr_coeff + (int64_t)median_val * median_val);
5248         }
5249         wiener_variance += tmp_coeff * tmp_coeff;
5250       }
5251       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
5252           wiener_variance / coeff_count;
5253       cpi->norm_wiener_variance +=
5254           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
5255       ++count;
5256     }
5257   }
5258
5259   if (count) cpi->norm_wiener_variance /= count;
5260   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
5261 }
5262
5263 #if !CONFIG_REALTIME_ONLY
5264 static void update_encode_frame_result(
5265     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
5266     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
5267     RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
5268     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
5269 #if CONFIG_RATE_CTRL
5270     const PARTITION_INFO *partition_info,
5271     const MOTION_VECTOR_INFO *motion_vector_info,
5272 #endif  // CONFIG_RATE_CTRL
5273     ENCODE_FRAME_RESULT *encode_frame_result);
5274 #endif  // !CONFIG_REALTIME_ONLY
5275
5276 static void encode_frame_to_data_rate(
5277     VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
5278     ENCODE_FRAME_RESULT *encode_frame_result) {
5279   VP9_COMMON *const cm = &cpi->common;
5280   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5281   struct segmentation *const seg = &cm->seg;
5282   TX_SIZE t;
5283
5284   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5285   // No need to set svc.skip_enhancement_layer if whole superframe will be
5286   // dropped.
5287   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5288       cpi->oxcf.target_bandwidth == 0 &&
5289       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5290         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5291          cpi->svc
5292              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5293                                                 1]) &&
5294         cpi->svc.drop_spatial_layer[0])) {
5295     cpi->svc.skip_enhancement_layer = 1;
5296     vp9_rc_postencode_update_drop_frame(cpi);
5297     cpi->ext_refresh_frame_flags_pending = 0;
5298     cpi->last_frame_dropped = 1;
5299     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5300     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5301     vp9_inc_frame_in_layer(cpi);
5302     return;
5303   }
5304
5305   set_ext_overrides(cpi);
5306   vpx_clear_system_state();
5307
5308 #ifdef ENABLE_KF_DENOISE
5309   // Spatial denoise of key frame.
5310   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5311 #endif
5312
5313   if (cm->show_existing_frame == 0) {
5314     // Update frame index
5315     set_frame_index(cpi, cm);
5316
5317     // Set the arf sign bias for this frame.
5318     set_ref_sign_bias(cpi);
5319   }
5320
5321   // Set default state for segment based loop filter update flags.
5322   cm->lf.mode_ref_delta_update = 0;
5323
5324   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5325     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5326
5327   // Set various flags etc to special state if it is a key frame.
5328   if (frame_is_intra_only(cm)) {
5329     // Reset the loop filter deltas and segmentation map.
5330     vp9_reset_segment_features(&cm->seg);
5331
5332     // If segmentation is enabled force a map update for key frames.
5333     if (seg->enabled) {
5334       seg->update_map = 1;
5335       seg->update_data = 1;
5336     }
5337
5338     // The alternate reference frame cannot be active for a key frame.
5339     cpi->rc.source_alt_ref_active = 0;
5340
5341     cm->error_resilient_mode = oxcf->error_resilient_mode;
5342     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5343
5344     // By default, encoder assumes decoder can use prev_mi.
5345     if (cm->error_resilient_mode) {
5346       cm->frame_parallel_decoding_mode = 1;
5347       cm->reset_frame_context = 0;
5348       cm->refresh_frame_context = 0;
5349     } else if (cm->intra_only) {
5350       // Only reset the current context.
5351       cm->reset_frame_context = 2;
5352     }
5353   }
5354
5355   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5356
5357   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5358     init_mb_wiener_var_buffer(cpi);
5359     set_mb_wiener_variance(cpi);
5360   }
5361
5362   vpx_clear_system_state();
5363
5364 #if CONFIG_INTERNAL_STATS
5365   memset(cpi->mode_chosen_counts, 0,
5366          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5367 #endif
5368 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5369   // Backup to ensure consistency between recodes
5370   save_encode_params(cpi);
5371 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5372
5373   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5374     if (!encode_without_recode_loop(cpi, size, dest)) return;
5375   } else {
5376 #if !CONFIG_REALTIME_ONLY
5377     encode_with_recode_loop(cpi, size, dest);
5378 #endif
5379   }
5380
5381   // TODO(jingning): When using show existing frame mode, we assume that the
5382   // current ARF will be directly used as the final reconstructed frame. This is
5383   // an encoder control scheme. One could in principle explore other
5384   // possibilities to arrange the reference frame buffer and their coding order.
5385   if (cm->show_existing_frame) {
5386     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5387                cm->ref_frame_map[cpi->alt_fb_idx]);
5388   }
5389
5390 #if !CONFIG_REALTIME_ONLY
5391   // Disable segmentation if it decrease rate/distortion ratio
5392   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5393     vp9_try_disable_lookahead_aq(cpi, size, dest);
5394 #endif
5395
5396 #if CONFIG_VP9_TEMPORAL_DENOISING
5397 #ifdef OUTPUT_YUV_DENOISED
5398   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5399     vpx_write_yuv_frame(yuv_denoised_file,
5400                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5401   }
5402 #endif
5403 #endif
5404 #ifdef OUTPUT_YUV_SKINMAP
5405   if (cpi->common.current_video_frame > 1) {
5406     vp9_output_skin_map(cpi, yuv_skinmap_file);
5407   }
5408 #endif
5409
5410   // Special case code to reduce pulsing when key frames are forced at a
5411   // fixed interval. Note the reconstruction error if it is the frame before
5412   // the force key frame
5413   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5414 #if CONFIG_VP9_HIGHBITDEPTH
5415     if (cm->use_highbitdepth) {
5416       cpi->ambient_err =
5417           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5418     } else {
5419       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5420     }
5421 #else
5422     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5423 #endif  // CONFIG_VP9_HIGHBITDEPTH
5424   }
5425
5426   // If the encoder forced a KEY_FRAME decision
5427   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5428
5429   cm->frame_to_show = get_frame_new_buffer(cm);
5430   cm->frame_to_show->color_space = cm->color_space;
5431   cm->frame_to_show->color_range = cm->color_range;
5432   cm->frame_to_show->render_width = cm->render_width;
5433   cm->frame_to_show->render_height = cm->render_height;
5434
5435   // Pick the loop filter level for the frame.
5436   loopfilter_frame(cpi, cm);
5437
5438   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5439
5440   // build the bitstream
5441   vp9_pack_bitstream(cpi, dest, size);
5442
5443 #if CONFIG_REALTIME_ONLY
5444   (void)encode_frame_result;
5445   assert(encode_frame_result == NULL);
5446 #else  // CONFIG_REALTIME_ONLY
5447   if (encode_frame_result != NULL) {
5448     const int ref_frame_flags = get_ref_frame_flags(cpi);
5449     const RefCntBuffer *coded_frame_buf =
5450         get_ref_cnt_buffer(cm, cm->new_fb_idx);
5451     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
5452     get_ref_frame_bufs(cpi, ref_frame_bufs);
5453     // update_encode_frame_result() depends on twopass.gf_group.index and
5454     // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
5455     // cpi->alt_fb_idx are updated for current frame and have
5456     // not been updated for the next frame yet.
5457     // The update locations are as follows.
5458     // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
5459     // for the first frame in the gf_group and is updated for the next frame at
5460     // vp9_twopass_postencode_update().
5461     // 2) cpi->Source is updated at the beginning of vp9_get_compressed_data()
5462     // 3) cm->new_fb_idx is updated at the beginning of
5463     // vp9_get_compressed_data() by get_free_fb(cm).
5464     // 4) cpi->lst_fb_idx/gld_fb_idx/alt_fb_idx will be updated for the next
5465     // frame at vp9_update_reference_frames().
5466     // This function needs to be called before vp9_update_reference_frames().
5467     // TODO(angiebird): Improve the codebase to make the update of frame
5468     // dependent variables more robust.
5469     update_encode_frame_result(
5470         ref_frame_flags,
5471         cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
5472         cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
5473         cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
5474 #if CONFIG_RATE_CTRL
5475         cpi->partition_info, cpi->motion_vector_info,
5476 #endif  // CONFIG_RATE_CTRL
5477         encode_frame_result);
5478   }
5479 #endif  // CONFIG_REALTIME_ONLY
5480
5481   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5482       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5483     restore_coding_context(cpi);
5484     return;
5485   }
5486
5487   cpi->last_frame_dropped = 0;
5488   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5489   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5490     cpi->svc.num_encoded_top_layer++;
5491
5492   // Keep track of the frame buffer index updated/refreshed for the
5493   // current encoded TL0 superframe.
5494   if (cpi->svc.temporal_layer_id == 0) {
5495     if (cpi->refresh_last_frame)
5496       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5497     else if (cpi->refresh_golden_frame)
5498       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5499     else if (cpi->refresh_alt_ref_frame)
5500       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5501   }
5502
5503   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5504
5505   if (frame_is_intra_only(cm) == 0) {
5506     release_scaled_references(cpi);
5507   }
5508   vp9_update_reference_frames(cpi);
5509
5510   if (!cm->show_existing_frame) {
5511     for (t = TX_4X4; t <= TX_32X32; ++t) {
5512       full_to_model_counts(cpi->td.counts->coef[t],
5513                            cpi->td.rd_counts.coef_counts[t]);
5514     }
5515
5516     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5517       if (!frame_is_intra_only(cm)) {
5518         vp9_adapt_mode_probs(cm);
5519         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5520       }
5521       vp9_adapt_coef_probs(cm);
5522     }
5523   }
5524
5525   cpi->ext_refresh_frame_flags_pending = 0;
5526
5527   if (cpi->refresh_golden_frame == 1)
5528     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5529   else
5530     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5531
5532   if (cpi->refresh_alt_ref_frame == 1)
5533     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5534   else
5535     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5536
5537   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5538
5539   cm->last_frame_type = cm->frame_type;
5540
5541   vp9_rc_postencode_update(cpi, *size);
5542
5543   if (oxcf->pass == 0 && !frame_is_intra_only(cm) &&
5544       (!cpi->use_svc ||
5545        (cpi->use_svc &&
5546         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
5547         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) {
5548     vp9_compute_frame_low_motion(cpi);
5549   }
5550
5551   *size = VPXMAX(1, *size);
5552
5553 #if 0
5554   output_frame_level_debug_stats(cpi);
5555 #endif
5556
5557   if (cm->frame_type == KEY_FRAME) {
5558     // Tell the caller that the frame was coded as a key frame
5559     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5560   } else {
5561     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5562   }
5563
5564   // Clear the one shot update flags for segmentation map and mode/ref loop
5565   // filter deltas.
5566   cm->seg.update_map = 0;
5567   cm->seg.update_data = 0;
5568   cm->lf.mode_ref_delta_update = 0;
5569
5570   // keep track of the last coded dimensions
5571   cm->last_width = cm->width;
5572   cm->last_height = cm->height;
5573
5574   // reset to normal state now that we are done.
5575   if (!cm->show_existing_frame) {
5576     cm->last_show_frame = cm->show_frame;
5577     cm->prev_frame = cm->cur_frame;
5578   }
5579
5580   if (cm->show_frame) {
5581     vp9_swap_mi_and_prev_mi(cm);
5582     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5583   }
5584   update_frame_indexes(cm, cm->show_frame);
5585
5586   if (cpi->use_svc) {
5587     cpi->svc
5588         .layer_context[cpi->svc.spatial_layer_id *
5589                            cpi->svc.number_temporal_layers +
5590                        cpi->svc.temporal_layer_id]
5591         .last_frame_type = cm->frame_type;
5592     // Reset layer_sync back to 0 for next frame.
5593     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5594   }
5595
5596   cpi->force_update_segmentation = 0;
5597
5598 #if !CONFIG_REALTIME_ONLY
5599   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5600     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5601 #endif
5602
5603   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5604   cpi->svc.set_intra_only_frame = 0;
5605 }
5606
5607 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5608                       unsigned int *frame_flags) {
5609   vp9_rc_get_svc_params(cpi);
5610   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5611                             /*encode_frame_result = */ NULL);
5612 }
5613
5614 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5615                         unsigned int *frame_flags) {
5616   if (cpi->oxcf.rc_mode == VPX_CBR) {
5617     vp9_rc_get_one_pass_cbr_params(cpi);
5618   } else {
5619     vp9_rc_get_one_pass_vbr_params(cpi);
5620   }
5621   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5622                             /*encode_frame_result = */ NULL);
5623 }
5624
5625 #if !CONFIG_REALTIME_ONLY
5626 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5627                         unsigned int *frame_flags,
5628                         ENCODE_FRAME_RESULT *encode_frame_result) {
5629   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5630 #if CONFIG_MISMATCH_DEBUG
5631   mismatch_move_frame_idx_w();
5632 #endif
5633   encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
5634 }
5635 #endif  // !CONFIG_REALTIME_ONLY
5636
5637 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5638                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5639                           int64_t end_time) {
5640   VP9_COMMON *const cm = &cpi->common;
5641   struct vpx_usec_timer timer;
5642   int res = 0;
5643   const int subsampling_x = sd->subsampling_x;
5644   const int subsampling_y = sd->subsampling_y;
5645 #if CONFIG_VP9_HIGHBITDEPTH
5646   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5647 #else
5648   const int use_highbitdepth = 0;
5649 #endif
5650
5651   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5652 #if CONFIG_VP9_TEMPORAL_DENOISING
5653   setup_denoiser_buffer(cpi);
5654 #endif
5655
5656   alloc_raw_frame_buffers(cpi);
5657
5658   vpx_usec_timer_start(&timer);
5659
5660   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5661                          use_highbitdepth, frame_flags))
5662     res = -1;
5663   vpx_usec_timer_mark(&timer);
5664   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5665
5666   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5667       (subsampling_x != 1 || subsampling_y != 1)) {
5668     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5669                        "Non-4:2:0 color format requires profile 1 or 3");
5670     res = -1;
5671   }
5672   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5673       (subsampling_x == 1 && subsampling_y == 1)) {
5674     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5675                        "4:2:0 color format requires profile 0 or 2");
5676     res = -1;
5677   }
5678
5679   return res;
5680 }
5681
5682 static int frame_is_reference(const VP9_COMP *cpi) {
5683   const VP9_COMMON *cm = &cpi->common;
5684
5685   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5686          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5687          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5688          cm->seg.update_map || cm->seg.update_data;
5689 }
5690
5691 static void adjust_frame_rate(VP9_COMP *cpi,
5692                               const struct lookahead_entry *source) {
5693   int64_t this_duration;
5694   int step = 0;
5695
5696   if (source->ts_start == cpi->first_time_stamp_ever) {
5697     this_duration = source->ts_end - source->ts_start;
5698     step = 1;
5699   } else {
5700     int64_t last_duration =
5701         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5702
5703     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5704
5705     // do a step update if the duration changes by 10%
5706     if (last_duration)
5707       step = (int)((this_duration - last_duration) * 10 / last_duration);
5708   }
5709
5710   if (this_duration) {
5711     if (step) {
5712       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5713     } else {
5714       // Average this frame's rate into the last second's average
5715       // frame rate. If we haven't seen 1 second yet, then average
5716       // over the whole interval seen.
5717       const double interval = VPXMIN(
5718           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5719       double avg_duration = 10000000.0 / cpi->framerate;
5720       avg_duration *= (interval - avg_duration + this_duration);
5721       avg_duration /= interval;
5722
5723       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5724     }
5725   }
5726   cpi->last_time_stamp_seen = source->ts_start;
5727   cpi->last_end_time_stamp_seen = source->ts_end;
5728 }
5729
5730 // Returns 0 if this is not an alt ref else the offset of the source frame
5731 // used as the arf midpoint.
5732 static int get_arf_src_index(VP9_COMP *cpi) {
5733   RATE_CONTROL *const rc = &cpi->rc;
5734   int arf_src_index = 0;
5735   if (is_altref_enabled(cpi)) {
5736     if (cpi->oxcf.pass == 2) {
5737       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5738       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5739         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5740       }
5741     } else if (rc->source_alt_ref_pending) {
5742       arf_src_index = rc->frames_till_gf_update_due;
5743     }
5744   }
5745   return arf_src_index;
5746 }
5747
5748 static void check_src_altref(VP9_COMP *cpi,
5749                              const struct lookahead_entry *source) {
5750   RATE_CONTROL *const rc = &cpi->rc;
5751
5752   if (cpi->oxcf.pass == 2) {
5753     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5754     rc->is_src_frame_alt_ref =
5755         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5756   } else {
5757     rc->is_src_frame_alt_ref =
5758         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5759   }
5760
5761   if (rc->is_src_frame_alt_ref) {
5762     // Current frame is an ARF overlay frame.
5763     cpi->alt_ref_source = NULL;
5764
5765     // Don't refresh the last buffer for an ARF overlay frame. It will
5766     // become the GF so preserve last as an alternative prediction option.
5767     cpi->refresh_last_frame = 0;
5768   }
5769 }
5770
5771 #if CONFIG_INTERNAL_STATS
5772 static void adjust_image_stat(double y, double u, double v, double all,
5773                               ImageStat *s) {
5774   s->stat[Y] += y;
5775   s->stat[U] += u;
5776   s->stat[V] += v;
5777   s->stat[ALL] += all;
5778   s->worst = VPXMIN(s->worst, all);
5779 }
5780 #endif  // CONFIG_INTERNAL_STATS
5781
5782 // Adjust the maximum allowable frame size for the target level.
5783 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5784   RATE_CONTROL *const rc = &cpi->rc;
5785   LevelConstraint *const ls = &cpi->level_constraint;
5786   VP9_COMMON *const cm = &cpi->common;
5787   const double max_cpb_size = ls->max_cpb_size;
5788   vpx_clear_system_state();
5789   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5790   if (frame_is_intra_only(cm)) {
5791     rc->max_frame_bandwidth =
5792         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5793   } else if (arf_src_index > 0) {
5794     rc->max_frame_bandwidth =
5795         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5796   } else {
5797     rc->max_frame_bandwidth =
5798         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5799   }
5800 }
5801
5802 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5803   VP9_COMMON *const cm = &cpi->common;
5804   Vp9LevelInfo *const level_info = &cpi->level_info;
5805   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5806   Vp9LevelStats *const level_stats = &level_info->level_stats;
5807   int i, idx;
5808   uint64_t luma_samples, dur_end;
5809   const uint32_t luma_pic_size = cm->width * cm->height;
5810   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5811   LevelConstraint *const level_constraint = &cpi->level_constraint;
5812   const int8_t level_index = level_constraint->level_index;
5813   double cpb_data_size;
5814
5815   vpx_clear_system_state();
5816
5817   // update level_stats
5818   level_stats->total_compressed_size += *size;
5819   if (cm->show_frame) {
5820     level_stats->total_uncompressed_size +=
5821         luma_pic_size +
5822         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5823     level_stats->time_encoded =
5824         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5825         (double)TICKS_PER_SEC;
5826   }
5827
5828   if (arf_src_index > 0) {
5829     if (!level_stats->seen_first_altref) {
5830       level_stats->seen_first_altref = 1;
5831     } else if (level_stats->frames_since_last_altref <
5832                level_spec->min_altref_distance) {
5833       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5834     }
5835     level_stats->frames_since_last_altref = 0;
5836   } else {
5837     ++level_stats->frames_since_last_altref;
5838   }
5839
5840   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5841     idx = (level_stats->frame_window_buffer.start +
5842            level_stats->frame_window_buffer.len++) %
5843           FRAME_WINDOW_SIZE;
5844   } else {
5845     idx = level_stats->frame_window_buffer.start;
5846     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5847   }
5848   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5849   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5850   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5851
5852   if (cm->frame_type == KEY_FRAME) {
5853     level_stats->ref_refresh_map = 0;
5854   } else {
5855     int count = 0;
5856     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5857     // Also need to consider the case where the encoder refers to a buffer
5858     // that has been implicitly refreshed after encoding a keyframe.
5859     if (!cm->intra_only) {
5860       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5861       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5862       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5863     }
5864     for (i = 0; i < REF_FRAMES; ++i) {
5865       count += (level_stats->ref_refresh_map >> i) & 1;
5866     }
5867     if (count > level_spec->max_ref_frame_buffers) {
5868       level_spec->max_ref_frame_buffers = count;
5869     }
5870   }
5871
5872   // update average_bitrate
5873   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5874                                 125.0 / level_stats->time_encoded;
5875
5876   // update max_luma_sample_rate
5877   luma_samples = 0;
5878   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5879     idx = (level_stats->frame_window_buffer.start +
5880            level_stats->frame_window_buffer.len - 1 - i) %
5881           FRAME_WINDOW_SIZE;
5882     if (i == 0) {
5883       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5884     }
5885     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5886         TICKS_PER_SEC) {
5887       break;
5888     }
5889     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5890   }
5891   if (luma_samples > level_spec->max_luma_sample_rate) {
5892     level_spec->max_luma_sample_rate = luma_samples;
5893   }
5894
5895   // update max_cpb_size
5896   cpb_data_size = 0;
5897   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5898     if (i >= level_stats->frame_window_buffer.len) break;
5899     idx = (level_stats->frame_window_buffer.start +
5900            level_stats->frame_window_buffer.len - 1 - i) %
5901           FRAME_WINDOW_SIZE;
5902     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5903   }
5904   cpb_data_size = cpb_data_size / 125.0;
5905   if (cpb_data_size > level_spec->max_cpb_size) {
5906     level_spec->max_cpb_size = cpb_data_size;
5907   }
5908
5909   // update max_luma_picture_size
5910   if (luma_pic_size > level_spec->max_luma_picture_size) {
5911     level_spec->max_luma_picture_size = luma_pic_size;
5912   }
5913
5914   // update max_luma_picture_breadth
5915   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5916     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5917   }
5918
5919   // update compression_ratio
5920   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5921                                   cm->bit_depth /
5922                                   level_stats->total_compressed_size / 8.0;
5923
5924   // update max_col_tiles
5925   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5926     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5927   }
5928
5929   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5930     if (level_spec->max_luma_picture_size >
5931         vp9_level_defs[level_index].max_luma_picture_size) {
5932       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5933       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5934                          "Failed to encode to the target level %d. %s",
5935                          vp9_level_defs[level_index].level,
5936                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5937     }
5938
5939     if (level_spec->max_luma_picture_breadth >
5940         vp9_level_defs[level_index].max_luma_picture_breadth) {
5941       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5942       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5943                          "Failed to encode to the target level %d. %s",
5944                          vp9_level_defs[level_index].level,
5945                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5946     }
5947
5948     if ((double)level_spec->max_luma_sample_rate >
5949         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5950             (1 + SAMPLE_RATE_GRACE_P)) {
5951       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5952       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5953                          "Failed to encode to the target level %d. %s",
5954                          vp9_level_defs[level_index].level,
5955                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5956     }
5957
5958     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5959       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5960       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5961                          "Failed to encode to the target level %d. %s",
5962                          vp9_level_defs[level_index].level,
5963                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5964     }
5965
5966     if (level_spec->min_altref_distance <
5967         vp9_level_defs[level_index].min_altref_distance) {
5968       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5969       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5970                          "Failed to encode to the target level %d. %s",
5971                          vp9_level_defs[level_index].level,
5972                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5973     }
5974
5975     if (level_spec->max_ref_frame_buffers >
5976         vp9_level_defs[level_index].max_ref_frame_buffers) {
5977       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5978       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5979                          "Failed to encode to the target level %d. %s",
5980                          vp9_level_defs[level_index].level,
5981                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5982     }
5983
5984     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5985       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5986       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5987                          "Failed to encode to the target level %d. %s",
5988                          vp9_level_defs[level_index].level,
5989                          level_fail_messages[CPB_TOO_LARGE]);
5990     }
5991
5992     // Set an upper bound for the next frame size. It will be used in
5993     // level_rc_framerate() before encoding the next frame.
5994     cpb_data_size = 0;
5995     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5996       if (i >= level_stats->frame_window_buffer.len) break;
5997       idx = (level_stats->frame_window_buffer.start +
5998              level_stats->frame_window_buffer.len - 1 - i) %
5999             FRAME_WINDOW_SIZE;
6000       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
6001     }
6002     cpb_data_size = cpb_data_size / 125.0;
6003     level_constraint->max_frame_size =
6004         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
6005               1000.0);
6006     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
6007       level_constraint->max_frame_size >>= 1;
6008   }
6009 }
6010
6011 typedef struct GF_PICTURE {
6012   YV12_BUFFER_CONFIG *frame;
6013   int ref_frame[3];
6014   FRAME_UPDATE_TYPE update_type;
6015 } GF_PICTURE;
6016
6017 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6018                             const GF_GROUP *gf_group, int *tpl_group_frames) {
6019   VP9_COMMON *cm = &cpi->common;
6020   int frame_idx = 0;
6021   int i;
6022   int gld_index = -1;
6023   int alt_index = -1;
6024   int lst_index = -1;
6025   int arf_index_stack[MAX_ARF_LAYERS];
6026   int arf_stack_size = 0;
6027   int extend_frame_count = 0;
6028   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
6029   int frame_gop_offset = 0;
6030
6031   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
6032   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
6033
6034   memset(recon_frame_index, -1, sizeof(recon_frame_index));
6035   stack_init(arf_index_stack, MAX_ARF_LAYERS);
6036
6037   // TODO(jingning): To be used later for gf frame type parsing.
6038   (void)gf_group;
6039
6040   for (i = 0; i < FRAME_BUFFERS; ++i) {
6041     if (frame_bufs[i].ref_count == 0) {
6042       alloc_frame_mvs(cm, i);
6043       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
6044                                    cm->subsampling_x, cm->subsampling_y,
6045 #if CONFIG_VP9_HIGHBITDEPTH
6046                                    cm->use_highbitdepth,
6047 #endif
6048                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
6049                                    NULL, NULL, NULL))
6050         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
6051                            "Failed to allocate frame buffer");
6052
6053       recon_frame_index[frame_idx] = i;
6054       ++frame_idx;
6055
6056       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
6057     }
6058   }
6059
6060   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
6061     assert(recon_frame_index[i] >= 0);
6062     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
6063   }
6064
6065   *tpl_group_frames = 0;
6066
6067   // Initialize Golden reference frame.
6068   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
6069   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
6070   gf_picture[0].update_type = gf_group->update_type[0];
6071   gld_index = 0;
6072   ++*tpl_group_frames;
6073
6074   // Initialize base layer ARF frame
6075   gf_picture[1].frame = cpi->Source;
6076   gf_picture[1].ref_frame[0] = gld_index;
6077   gf_picture[1].ref_frame[1] = lst_index;
6078   gf_picture[1].ref_frame[2] = alt_index;
6079   gf_picture[1].update_type = gf_group->update_type[1];
6080   alt_index = 1;
6081   ++*tpl_group_frames;
6082
6083   // Initialize P frames
6084   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6085     struct lookahead_entry *buf;
6086     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
6087     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6088
6089     if (buf == NULL) break;
6090
6091     gf_picture[frame_idx].frame = &buf->img;
6092     gf_picture[frame_idx].ref_frame[0] = gld_index;
6093     gf_picture[frame_idx].ref_frame[1] = lst_index;
6094     gf_picture[frame_idx].ref_frame[2] = alt_index;
6095     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
6096
6097     switch (gf_group->update_type[frame_idx]) {
6098       case ARF_UPDATE:
6099         stack_push(arf_index_stack, alt_index, arf_stack_size);
6100         ++arf_stack_size;
6101         alt_index = frame_idx;
6102         break;
6103       case LF_UPDATE: lst_index = frame_idx; break;
6104       case OVERLAY_UPDATE:
6105         gld_index = frame_idx;
6106         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6107         --arf_stack_size;
6108         break;
6109       case USE_BUF_FRAME:
6110         lst_index = alt_index;
6111         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6112         --arf_stack_size;
6113         break;
6114       default: break;
6115     }
6116
6117     ++*tpl_group_frames;
6118
6119     // The length of group of pictures is baseline_gf_interval, plus the
6120     // beginning golden frame from last GOP, plus the last overlay frame in
6121     // the same GOP.
6122     if (frame_idx == gf_group->gf_group_size) break;
6123   }
6124
6125   alt_index = -1;
6126   ++frame_idx;
6127   ++frame_gop_offset;
6128
6129   // Extend two frames outside the current gf group.
6130   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
6131     struct lookahead_entry *buf =
6132         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6133
6134     if (buf == NULL) break;
6135
6136     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
6137
6138     gf_picture[frame_idx].frame = &buf->img;
6139     gf_picture[frame_idx].ref_frame[0] = gld_index;
6140     gf_picture[frame_idx].ref_frame[1] = lst_index;
6141     gf_picture[frame_idx].ref_frame[2] = alt_index;
6142     gf_picture[frame_idx].update_type = LF_UPDATE;
6143     lst_index = frame_idx;
6144     ++*tpl_group_frames;
6145     ++extend_frame_count;
6146     ++frame_gop_offset;
6147   }
6148 }
6149
6150 static void init_tpl_stats(VP9_COMP *cpi) {
6151   int frame_idx;
6152   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6153     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6154     memset(tpl_frame->tpl_stats_ptr, 0,
6155            tpl_frame->height * tpl_frame->width *
6156                sizeof(*tpl_frame->tpl_stats_ptr));
6157     tpl_frame->is_valid = 0;
6158   }
6159 }
6160
6161 #if CONFIG_NON_GREEDY_MV
6162 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6163                                          MotionField *motion_field,
6164                                          int frame_idx, uint8_t *cur_frame_buf,
6165                                          uint8_t *ref_frame_buf, int stride,
6166                                          BLOCK_SIZE bsize, int mi_row,
6167                                          int mi_col, MV *mv) {
6168   MACROBLOCK *const x = &td->mb;
6169   MACROBLOCKD *const xd = &x->e_mbd;
6170   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6171   int step_param;
6172   uint32_t bestsme = UINT_MAX;
6173   const MvLimits tmp_mv_limits = x->mv_limits;
6174   // lambda is used to adjust the importance of motion vector consistency.
6175   // TODO(angiebird): Figure out lambda's proper value.
6176   const int lambda = cpi->tpl_stats[frame_idx].lambda;
6177   int_mv nb_full_mvs[NB_MVS_NUM];
6178   int nb_full_mv_num;
6179
6180   MV best_ref_mv1 = { 0, 0 };
6181   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6182
6183   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6184   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6185
6186   // Setup frame pointers
6187   x->plane[0].src.buf = cur_frame_buf;
6188   x->plane[0].src.stride = stride;
6189   xd->plane[0].pre[0].buf = ref_frame_buf;
6190   xd->plane[0].pre[0].stride = stride;
6191
6192   step_param = mv_sf->reduce_first_step_size;
6193   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6194
6195   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6196
6197   nb_full_mv_num =
6198       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
6199   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
6200                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
6201
6202   /* restore UMV window */
6203   x->mv_limits = tmp_mv_limits;
6204
6205   return bestsme;
6206 }
6207
6208 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6209                                         uint8_t *cur_frame_buf,
6210                                         uint8_t *ref_frame_buf, int stride,
6211                                         BLOCK_SIZE bsize, MV *mv) {
6212   MACROBLOCK *const x = &td->mb;
6213   MACROBLOCKD *const xd = &x->e_mbd;
6214   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6215   uint32_t bestsme = UINT_MAX;
6216   uint32_t distortion;
6217   uint32_t sse;
6218   int cost_list[5];
6219
6220   MV best_ref_mv1 = { 0, 0 };
6221
6222   // Setup frame pointers
6223   x->plane[0].src.buf = cur_frame_buf;
6224   x->plane[0].src.stride = stride;
6225   xd->plane[0].pre[0].buf = ref_frame_buf;
6226   xd->plane[0].pre[0].stride = stride;
6227
6228   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6229   // Ignore mv costing by sending NULL pointer instead of cost array
6230   bestsme = cpi->find_fractional_mv_step(
6231       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6232       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6233       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6234       USE_2_TAPS);
6235
6236   return bestsme;
6237 }
6238
6239 #else  // CONFIG_NON_GREEDY_MV
6240 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
6241                                               uint8_t *cur_frame_buf,
6242                                               uint8_t *ref_frame_buf,
6243                                               int stride, BLOCK_SIZE bsize,
6244                                               MV *mv) {
6245   MACROBLOCK *const x = &td->mb;
6246   MACROBLOCKD *const xd = &x->e_mbd;
6247   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6248   const SEARCH_METHODS search_method = NSTEP;
6249   int step_param;
6250   int sadpb = x->sadperbit16;
6251   uint32_t bestsme = UINT_MAX;
6252   uint32_t distortion;
6253   uint32_t sse;
6254   int cost_list[5];
6255   const MvLimits tmp_mv_limits = x->mv_limits;
6256
6257   MV best_ref_mv1 = { 0, 0 };
6258   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6259
6260   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6261   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6262
6263   // Setup frame pointers
6264   x->plane[0].src.buf = cur_frame_buf;
6265   x->plane[0].src.stride = stride;
6266   xd->plane[0].pre[0].buf = ref_frame_buf;
6267   xd->plane[0].pre[0].stride = stride;
6268
6269   step_param = mv_sf->reduce_first_step_size;
6270   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6271
6272   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6273
6274   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
6275                         search_method, sadpb, cond_cost_list(cpi, cost_list),
6276                         &best_ref_mv1, mv, 0, 0);
6277
6278   /* restore UMV window */
6279   x->mv_limits = tmp_mv_limits;
6280
6281   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6282   // Ignore mv costing by sending NULL pointer instead of cost array
6283   bestsme = cpi->find_fractional_mv_step(
6284       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6285       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6286       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6287       USE_2_TAPS);
6288
6289   return bestsme;
6290 }
6291 #endif
6292
6293 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6294                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6295   int width = 0, height = 0;
6296   int bw = 4 << b_width_log2_lookup[bsize];
6297   int bh = 4 << b_height_log2_lookup[bsize];
6298
6299   switch (block) {
6300     case 0:
6301       width = grid_pos_col + bw - ref_pos_col;
6302       height = grid_pos_row + bh - ref_pos_row;
6303       break;
6304     case 1:
6305       width = ref_pos_col + bw - grid_pos_col;
6306       height = grid_pos_row + bh - ref_pos_row;
6307       break;
6308     case 2:
6309       width = grid_pos_col + bw - ref_pos_col;
6310       height = ref_pos_row + bh - grid_pos_row;
6311       break;
6312     case 3:
6313       width = ref_pos_col + bw - grid_pos_col;
6314       height = ref_pos_row + bh - grid_pos_row;
6315       break;
6316     default: assert(0);
6317   }
6318
6319   return width * height;
6320 }
6321
6322 static int round_floor(int ref_pos, int bsize_pix) {
6323   int round;
6324   if (ref_pos < 0)
6325     round = -(1 + (-ref_pos - 1) / bsize_pix);
6326   else
6327     round = ref_pos / bsize_pix;
6328
6329   return round;
6330 }
6331
6332 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6333                             BLOCK_SIZE bsize, int stride) {
6334   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6335   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6336   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6337   int idx, idy;
6338
6339   for (idy = 0; idy < mi_height; ++idy) {
6340     for (idx = 0; idx < mi_width; ++idx) {
6341       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6342       const int64_t mc_flow = tpl_ptr->mc_flow;
6343       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6344       *tpl_ptr = *src_stats;
6345       tpl_ptr->mc_flow = mc_flow;
6346       tpl_ptr->mc_ref_cost = mc_ref_cost;
6347       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6348     }
6349   }
6350 }
6351
6352 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6353                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6354   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6355   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6356   MV mv = tpl_stats->mv.as_mv;
6357   int mv_row = mv.row >> 3;
6358   int mv_col = mv.col >> 3;
6359
6360   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6361   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6362
6363   const int bw = 4 << b_width_log2_lookup[bsize];
6364   const int bh = 4 << b_height_log2_lookup[bsize];
6365   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6366   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6367   const int pix_num = bw * bh;
6368
6369   // top-left on grid block location in pixel
6370   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6371   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6372   int block;
6373
6374   for (block = 0; block < 4; ++block) {
6375     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6376     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6377
6378     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6379         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6380       int overlap_area = get_overlap_area(
6381           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6382       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6383       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6384
6385       int64_t mc_flow = tpl_stats->mc_dep_cost -
6386                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6387                             tpl_stats->intra_cost;
6388
6389       int idx, idy;
6390
6391       for (idy = 0; idy < mi_height; ++idy) {
6392         for (idx = 0; idx < mi_width; ++idx) {
6393           TplDepStats *des_stats =
6394               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6395                          (ref_mi_col + idx)];
6396
6397           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6398           des_stats->mc_ref_cost +=
6399               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6400               pix_num;
6401           assert(overlap_area >= 0);
6402         }
6403       }
6404     }
6405   }
6406 }
6407
6408 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6409                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6410   int idx, idy;
6411   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6412   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6413
6414   for (idy = 0; idy < mi_height; ++idy) {
6415     for (idx = 0; idx < mi_width; ++idx) {
6416       TplDepStats *tpl_ptr =
6417           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6418       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6419                          BLOCK_8X8);
6420     }
6421   }
6422 }
6423
6424 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6425                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6426                                TX_SIZE tx_size, int64_t *recon_error,
6427                                int64_t *sse) {
6428   MACROBLOCKD *const xd = &x->e_mbd;
6429   const struct macroblock_plane *const p = &x->plane[plane];
6430   const struct macroblockd_plane *const pd = &xd->plane[plane];
6431   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6432   uint16_t eob;
6433   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6434   const int shift = tx_size == TX_32X32 ? 0 : 2;
6435
6436 #if CONFIG_VP9_HIGHBITDEPTH
6437   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6438     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6439                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6440                                  &eob, scan_order->scan, scan_order->iscan);
6441   } else {
6442     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6443                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6444                           scan_order->scan, scan_order->iscan);
6445   }
6446 #else
6447   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6448                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6449                         scan_order->iscan);
6450 #endif  // CONFIG_VP9_HIGHBITDEPTH
6451
6452   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6453   *recon_error = VPXMAX(*recon_error, 1);
6454
6455   *sse = (*sse) >> shift;
6456   *sse = VPXMAX(*sse, 1);
6457 }
6458
6459 #if CONFIG_VP9_HIGHBITDEPTH
6460 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6461                          TX_SIZE tx_size) {
6462   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6463   switch (tx_size) {
6464     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6465     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6466     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6467     default: assert(0);
6468   }
6469 }
6470 #endif  // CONFIG_VP9_HIGHBITDEPTH
6471
6472 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6473                   TX_SIZE tx_size) {
6474   switch (tx_size) {
6475     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6476     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6477     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6478     default: assert(0);
6479   }
6480 }
6481
6482 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6483                           int mi_col) {
6484   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6485   x->mv_limits.row_max =
6486       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6487   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6488   x->mv_limits.col_max =
6489       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6490 }
6491
6492 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6493                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6494                             int frame_idx, TplDepFrame *tpl_frame,
6495                             int16_t *src_diff, tran_low_t *coeff,
6496                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6497                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6498                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6499                             int64_t *recon_error, int64_t *sse) {
6500   VP9_COMMON *cm = &cpi->common;
6501   ThreadData *td = &cpi->td;
6502
6503   const int bw = 4 << b_width_log2_lookup[bsize];
6504   const int bh = 4 << b_height_log2_lookup[bsize];
6505   const int pix_num = bw * bh;
6506   int best_rf_idx = -1;
6507   int_mv best_mv;
6508   int64_t best_inter_cost = INT64_MAX;
6509   int64_t inter_cost;
6510   int rf_idx;
6511   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6512
6513   int64_t best_intra_cost = INT64_MAX;
6514   int64_t intra_cost;
6515   PREDICTION_MODE mode;
6516   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6517   MODE_INFO mi_above, mi_left;
6518   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6519   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6520   TplDepStats *tpl_stats =
6521       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6522
6523   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6524   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6525   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6526   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6527   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6528   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6529
6530   // Intra prediction search
6531   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6532     uint8_t *src, *dst;
6533     int src_stride, dst_stride;
6534
6535     src = xd->cur_buf->y_buffer + mb_y_offset;
6536     src_stride = xd->cur_buf->y_stride;
6537
6538     dst = &predictor[0];
6539     dst_stride = bw;
6540
6541     xd->mi[0]->sb_type = bsize;
6542     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6543
6544     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6545                             src_stride, dst, dst_stride, 0, 0, 0);
6546
6547 #if CONFIG_VP9_HIGHBITDEPTH
6548     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6549       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6550                                 dst_stride, xd->bd);
6551       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6552       intra_cost = vpx_highbd_satd(coeff, pix_num);
6553     } else {
6554       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6555                          dst_stride);
6556       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6557       intra_cost = vpx_satd(coeff, pix_num);
6558     }
6559 #else
6560     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6561     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6562     intra_cost = vpx_satd(coeff, pix_num);
6563 #endif  // CONFIG_VP9_HIGHBITDEPTH
6564
6565     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6566   }
6567
6568   // Motion compensated prediction
6569   best_mv.as_int = 0;
6570
6571   set_mv_limits(cm, x, mi_row, mi_col);
6572
6573   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6574     int_mv mv;
6575 #if CONFIG_NON_GREEDY_MV
6576     MotionField *motion_field;
6577 #endif
6578     if (ref_frame[rf_idx] == NULL) continue;
6579
6580 #if CONFIG_NON_GREEDY_MV
6581     (void)td;
6582     motion_field = vp9_motion_field_info_get_motion_field(
6583         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6584     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6585 #else
6586     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6587                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6588                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6589 #endif
6590
6591 #if CONFIG_VP9_HIGHBITDEPTH
6592     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6593       vp9_highbd_build_inter_predictor(
6594           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6595           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6596           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6597           mi_row * MI_SIZE, xd->bd);
6598       vpx_highbd_subtract_block(
6599           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6600           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6601       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6602       inter_cost = vpx_highbd_satd(coeff, pix_num);
6603     } else {
6604       vp9_build_inter_predictor(
6605           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6606           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6607           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6608       vpx_subtract_block(bh, bw, src_diff, bw,
6609                          xd->cur_buf->y_buffer + mb_y_offset,
6610                          xd->cur_buf->y_stride, &predictor[0], bw);
6611       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6612       inter_cost = vpx_satd(coeff, pix_num);
6613     }
6614 #else
6615     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6616                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6617                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6618                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6619     vpx_subtract_block(bh, bw, src_diff, bw,
6620                        xd->cur_buf->y_buffer + mb_y_offset,
6621                        xd->cur_buf->y_stride, &predictor[0], bw);
6622     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6623     inter_cost = vpx_satd(coeff, pix_num);
6624 #endif
6625
6626     if (inter_cost < best_inter_cost) {
6627       best_rf_idx = rf_idx;
6628       best_inter_cost = inter_cost;
6629       best_mv.as_int = mv.as_int;
6630       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6631                          sse);
6632     }
6633   }
6634   best_intra_cost = VPXMAX(best_intra_cost, 1);
6635   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6636   tpl_stats->inter_cost = VPXMAX(
6637       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6638   tpl_stats->intra_cost = VPXMAX(
6639       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6640   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6641   tpl_stats->mv.as_int = best_mv.as_int;
6642 }
6643
6644 #if CONFIG_NON_GREEDY_MV
6645 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6646                                   int frame_idx, int rf_idx, int mi_row,
6647                                   int mi_col, struct buf_2d *src,
6648                                   struct buf_2d *pre) {
6649   const int mb_y_offset =
6650       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6651   YV12_BUFFER_CONFIG *ref_frame = NULL;
6652   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6653   if (ref_frame_idx != -1) {
6654     ref_frame = gf_picture[ref_frame_idx].frame;
6655     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6656     src->stride = xd->cur_buf->y_stride;
6657     pre->buf = ref_frame->y_buffer + mb_y_offset;
6658     pre->stride = ref_frame->y_stride;
6659     assert(src->stride == pre->stride);
6660     return 1;
6661   } else {
6662     printf("invalid ref_frame_idx");
6663     assert(ref_frame_idx != -1);
6664     return 0;
6665   }
6666 }
6667
6668 #define kMvPreCheckLines 5
6669 #define kMvPreCheckSize 15
6670
6671 #define MV_REF_POS_NUM 3
6672 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6673   { -1, 0 },
6674   { 0, -1 },
6675   { -1, -1 },
6676 };
6677
6678 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6679                              int mi_col) {
6680   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6681 }
6682
6683 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6684                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6685   int i;
6686   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6687   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6688   int_mv nearest_mv, near_mv, invalid_mv;
6689   nearest_mv.as_int = INVALID_MV;
6690   near_mv.as_int = INVALID_MV;
6691   invalid_mv.as_int = INVALID_MV;
6692   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6693     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6694     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6695     assert(mv_ref_pos[i].row <= 0);
6696     assert(mv_ref_pos[i].col <= 0);
6697     if (nb_row >= 0 && nb_col >= 0) {
6698       if (nearest_mv.as_int == INVALID_MV) {
6699         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6700       } else {
6701         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6702         if (mv.as_int == nearest_mv.as_int) {
6703           continue;
6704         } else {
6705           near_mv = mv;
6706           break;
6707         }
6708       }
6709     }
6710   }
6711   if (nearest_mv.as_int == INVALID_MV) {
6712     nearest_mv.as_mv.row = 0;
6713     nearest_mv.as_mv.col = 0;
6714   }
6715   if (near_mv.as_int == INVALID_MV) {
6716     near_mv.as_mv.row = 0;
6717     near_mv.as_mv.col = 0;
6718   }
6719   if (mv_mode == NEAREST_MV_MODE) {
6720     return nearest_mv;
6721   }
6722   if (mv_mode == NEAR_MV_MODE) {
6723     return near_mv;
6724   }
6725   assert(0);
6726   return invalid_mv;
6727 }
6728
6729 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6730                                   MotionField *motion_field,
6731                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6732                                   int mi_row, int mi_col) {
6733   int_mv mv;
6734   switch (mv_mode) {
6735     case ZERO_MV_MODE:
6736       mv.as_mv.row = 0;
6737       mv.as_mv.col = 0;
6738       break;
6739     case NEW_MV_MODE:
6740       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6741       break;
6742     case NEAREST_MV_MODE:
6743       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6744       break;
6745     case NEAR_MV_MODE:
6746       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6747       break;
6748     default:
6749       mv.as_int = INVALID_MV;
6750       assert(0);
6751       break;
6752   }
6753   return mv;
6754 }
6755
6756 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6757                           GF_PICTURE *gf_picture, MotionField *motion_field,
6758                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6759                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6760                           int_mv *mv) {
6761   uint32_t sse;
6762   struct buf_2d src;
6763   struct buf_2d pre;
6764   MV full_mv;
6765   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6766                             mi_row, mi_col);
6767   full_mv = get_full_mv(&mv->as_mv);
6768   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6769                              &src, &pre)) {
6770     // TODO(angiebird): Consider subpixel when computing the sse.
6771     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6772                           pre.stride, &sse);
6773     return (double)(sse << VP9_DIST_SCALE_LOG2);
6774   } else {
6775     assert(0);
6776     return 0;
6777   }
6778 }
6779
6780 static int get_mv_mode_cost(int mv_mode) {
6781   // TODO(angiebird): The probabilities are roughly inferred from
6782   // default_inter_mode_probs. Check if there is a better way to set the
6783   // probabilities.
6784   const int zero_mv_prob = 16;
6785   const int new_mv_prob = 24 * 1;
6786   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6787   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6788   switch (mv_mode) {
6789     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6790     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6791     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6792     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6793     default: assert(0); return -1;
6794   }
6795 }
6796
6797 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6798   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6799                         log2(1 + abs(new_mv->col - ref_mv->col));
6800   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6801   return mv_diff_cost;
6802 }
6803 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6804                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6805                           int mi_col) {
6806   double mv_cost = get_mv_mode_cost(mv_mode);
6807   if (mv_mode == NEW_MV_MODE) {
6808     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6809                                     bsize, mi_row, mi_col)
6810                     .as_mv;
6811     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6812                                         tpl_frame, bsize, mi_row, mi_col)
6813                         .as_mv;
6814     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6815                                      bsize, mi_row, mi_col)
6816                      .as_mv;
6817     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6818     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6819     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6820   }
6821   return mv_cost;
6822 }
6823
6824 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6825                            GF_PICTURE *gf_picture, MotionField *motion_field,
6826                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6827                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6828                            int_mv *mv) {
6829   MACROBLOCKD *xd = &x->e_mbd;
6830   double mv_dist =
6831       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6832                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6833   double mv_cost =
6834       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6835   double mult = 180;
6836
6837   return mv_cost + mult * log2f(1 + mv_dist);
6838 }
6839
6840 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6841                                  GF_PICTURE *gf_picture,
6842                                  MotionField *motion_field, int frame_idx,
6843                                  TplDepFrame *tpl_frame, int rf_idx,
6844                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6845                                  double *rd, int_mv *mv) {
6846   int best_mv_mode = ZERO_MV_MODE;
6847   int update = 0;
6848   int mv_mode;
6849   *rd = 0;
6850   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6851     double this_rd;
6852     int_mv this_mv;
6853     if (mv_mode == NEW_MV_MODE) {
6854       continue;
6855     }
6856     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6857                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6858     if (update == 0) {
6859       *rd = this_rd;
6860       *mv = this_mv;
6861       best_mv_mode = mv_mode;
6862       update = 1;
6863     } else {
6864       if (this_rd < *rd) {
6865         *rd = this_rd;
6866         *mv = this_mv;
6867         best_mv_mode = mv_mode;
6868       }
6869     }
6870   }
6871   return best_mv_mode;
6872 }
6873
6874 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6875                             GF_PICTURE *gf_picture, MotionField *motion_field,
6876                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6877                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6878   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6879   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6880   int tmp_mv_mode_arr[kMvPreCheckSize];
6881   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6882   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6883   int_mv *select_mv_arr = cpi->select_mv_arr;
6884   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6885   int stride = tpl_frame->stride;
6886   double new_mv_rd = 0;
6887   double no_new_mv_rd = 0;
6888   double this_new_mv_rd = 0;
6889   double this_no_new_mv_rd = 0;
6890   int idx;
6891   int tmp_idx;
6892   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6893
6894   // no new mv
6895   // diagonal scan order
6896   tmp_idx = 0;
6897   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6898     int r;
6899     for (r = 0; r <= idx; ++r) {
6900       int c = idx - r;
6901       int nb_row = mi_row + r * mi_height;
6902       int nb_col = mi_col + c * mi_width;
6903       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6904         double this_rd;
6905         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6906         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6907             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6908             bsize, nb_row, nb_col, &this_rd, mv);
6909         if (r == 0 && c == 0) {
6910           this_no_new_mv_rd = this_rd;
6911         }
6912         no_new_mv_rd += this_rd;
6913         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6914         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6915         ++tmp_idx;
6916       }
6917     }
6918   }
6919
6920   // new mv
6921   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6922   this_new_mv_rd = eval_mv_mode(
6923       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6924       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6925   new_mv_rd = this_new_mv_rd;
6926   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6927   // beforehand.
6928   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6929     int r;
6930     for (r = 0; r <= idx; ++r) {
6931       int c = idx - r;
6932       int nb_row = mi_row + r * mi_height;
6933       int nb_col = mi_col + c * mi_width;
6934       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6935         double this_rd;
6936         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6937         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6938             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6939             bsize, nb_row, nb_col, &this_rd, mv);
6940         new_mv_rd += this_rd;
6941       }
6942     }
6943   }
6944
6945   // update best_mv_mode
6946   tmp_idx = 0;
6947   if (no_new_mv_rd < new_mv_rd) {
6948     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6949       int r;
6950       for (r = 0; r <= idx; ++r) {
6951         int c = idx - r;
6952         int nb_row = mi_row + r * mi_height;
6953         int nb_col = mi_col + c * mi_width;
6954         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6955           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6956           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6957           ++tmp_idx;
6958         }
6959       }
6960     }
6961     rd_diff_arr[mi_row * stride + mi_col] = 0;
6962   } else {
6963     rd_diff_arr[mi_row * stride + mi_col] =
6964         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6965   }
6966 }
6967
6968 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6969                                 GF_PICTURE *gf_picture,
6970                                 MotionField *motion_field, int frame_idx,
6971                                 TplDepFrame *tpl_frame, int rf_idx,
6972                                 BLOCK_SIZE bsize) {
6973   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6974   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6975   const int unit_rows = tpl_frame->mi_rows / mi_height;
6976   const int unit_cols = tpl_frame->mi_cols / mi_width;
6977   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6978   int idx;
6979   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6980     int r;
6981     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6982          ++r) {
6983       int c = idx - r;
6984       int mi_row = r * mi_height;
6985       int mi_col = c * mi_width;
6986       assert(c >= 0 && c < unit_cols);
6987       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6988       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6989       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6990                       rf_idx, bsize, mi_row, mi_col);
6991     }
6992   }
6993 }
6994
6995 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6996                              MotionField *motion_field, int frame_idx,
6997                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
6998                              int mi_row, int mi_col) {
6999   VP9_COMMON *cm = &cpi->common;
7000   MACROBLOCK *x = &td->mb;
7001   MACROBLOCKD *xd = &x->e_mbd;
7002   const int mb_y_offset =
7003       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
7004   assert(ref_frame != NULL);
7005   set_mv_limits(cm, x, mi_row, mi_col);
7006   {
7007     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
7008     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
7009     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
7010     const int stride = xd->cur_buf->y_stride;
7011     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
7012                              ref_frame_buf, stride, bsize, mi_row, mi_col,
7013                              &mv.as_mv);
7014     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
7015                             bsize, &mv.as_mv);
7016     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
7017   }
7018 }
7019
7020 static void build_motion_field(
7021     VP9_COMP *cpi, int frame_idx,
7022     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
7023   VP9_COMMON *cm = &cpi->common;
7024   ThreadData *td = &cpi->td;
7025   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7026   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7027   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7028   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
7029   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
7030   int mi_row, mi_col;
7031   int rf_idx;
7032
7033   tpl_frame->lambda = (pw * ph) >> 2;
7034   assert(pw * ph == tpl_frame->lambda << 2);
7035
7036   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7037     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7038         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7039     if (ref_frame[rf_idx] == NULL) {
7040       continue;
7041     }
7042     vp9_motion_field_reset_mvs(motion_field);
7043     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7044       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7045         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
7046                          bsize, mi_row, mi_col);
7047       }
7048     }
7049   }
7050 }
7051 #endif  // CONFIG_NON_GREEDY_MV
7052
7053 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
7054                               int frame_idx, BLOCK_SIZE bsize) {
7055   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7056   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
7057   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
7058
7059   VP9_COMMON *cm = &cpi->common;
7060   struct scale_factors sf;
7061   int rdmult, idx;
7062   ThreadData *td = &cpi->td;
7063   MACROBLOCK *x = &td->mb;
7064   MACROBLOCKD *xd = &x->e_mbd;
7065   int mi_row, mi_col;
7066
7067 #if CONFIG_VP9_HIGHBITDEPTH
7068   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
7069   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
7070   uint8_t *predictor;
7071 #else
7072   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
7073 #endif
7074   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
7075   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
7076   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
7077   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
7078
7079   const TX_SIZE tx_size = max_txsize_lookup[bsize];
7080   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7081   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7082   int64_t recon_error, sse;
7083 #if CONFIG_NON_GREEDY_MV
7084   int square_block_idx;
7085   int rf_idx;
7086 #endif
7087
7088   // Setup scaling factor
7089 #if CONFIG_VP9_HIGHBITDEPTH
7090   vp9_setup_scale_factors_for_frame(
7091       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7092       this_frame->y_crop_width, this_frame->y_crop_height,
7093       cpi->common.use_highbitdepth);
7094
7095   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7096     predictor = CONVERT_TO_BYTEPTR(predictor16);
7097   else
7098     predictor = predictor8;
7099 #else
7100   vp9_setup_scale_factors_for_frame(
7101       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7102       this_frame->y_crop_width, this_frame->y_crop_height);
7103 #endif  // CONFIG_VP9_HIGHBITDEPTH
7104
7105   // Prepare reference frame pointers. If any reference frame slot is
7106   // unavailable, the pointer will be set to Null.
7107   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
7108     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
7109     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
7110   }
7111
7112   xd->mi = cm->mi_grid_visible;
7113   xd->mi[0] = cm->mi;
7114   xd->cur_buf = this_frame;
7115
7116   // Get rd multiplier set up.
7117   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
7118   set_error_per_bit(&cpi->td.mb, rdmult);
7119   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
7120
7121   tpl_frame->is_valid = 1;
7122
7123   cm->base_qindex = tpl_frame->base_qindex;
7124   vp9_frame_init_quantizer(cpi);
7125
7126 #if CONFIG_NON_GREEDY_MV
7127   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
7128        ++square_block_idx) {
7129     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
7130     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
7131   }
7132   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7133     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7134     if (ref_frame_idx != -1) {
7135       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7136           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7137       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
7138                           tpl_frame, rf_idx, bsize);
7139     }
7140   }
7141 #endif
7142
7143   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7144     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7145       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
7146                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
7147                       tx_size, ref_frame, predictor, &recon_error, &sse);
7148       // Motion flow dependency dispenser.
7149       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
7150                       tpl_frame->stride);
7151
7152       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
7153                        bsize);
7154     }
7155   }
7156 }
7157
7158 #if CONFIG_NON_GREEDY_MV
7159 #define DUMP_TPL_STATS 0
7160 #if DUMP_TPL_STATS
7161 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
7162   int i, j;
7163   printf("%d %d\n", h, w);
7164   for (i = 0; i < h; ++i) {
7165     for (j = 0; j < w; ++j) {
7166       printf("%d ", buf[(row + i) * stride + col + j]);
7167     }
7168   }
7169   printf("\n");
7170 }
7171
7172 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
7173   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
7174            frame_buf->y_width);
7175   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
7176            frame_buf->uv_height, frame_buf->uv_width);
7177   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
7178            frame_buf->uv_height, frame_buf->uv_width);
7179 }
7180
7181 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
7182                            const GF_GROUP *gf_group,
7183                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
7184   int frame_idx;
7185   const VP9_COMMON *cm = &cpi->common;
7186   int rf_idx;
7187   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
7188     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7189       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7190       int mi_row, mi_col;
7191       int ref_frame_idx;
7192       const int mi_height = num_8x8_blocks_high_lookup[bsize];
7193       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7194       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7195       if (ref_frame_idx != -1) {
7196         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
7197         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
7198         const int ref_gf_frame_offset =
7199             gf_group->frame_gop_index[ref_frame_idx];
7200         printf("=\n");
7201         printf(
7202             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
7203             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
7204             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
7205             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
7206         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7207           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7208             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7209               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
7210                                                        frame_idx, rf_idx, bsize,
7211                                                        mi_row, mi_col);
7212               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
7213                      mv.as_mv.col);
7214             }
7215           }
7216         }
7217         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7218           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7219             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7220               const TplDepStats *tpl_ptr =
7221                   &tpl_frame
7222                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
7223               printf("%f ", tpl_ptr->feature_score);
7224             }
7225           }
7226         }
7227         printf("\n");
7228
7229         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7230           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7231             const int mv_mode =
7232                 tpl_frame
7233                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
7234             printf("%d ", mv_mode);
7235           }
7236         }
7237         printf("\n");
7238
7239         dump_frame_buf(gf_picture[frame_idx].frame);
7240         dump_frame_buf(ref_frame_buf);
7241       }
7242     }
7243   }
7244 }
7245 #endif  // DUMP_TPL_STATS
7246 #endif  // CONFIG_NON_GREEDY_MV
7247
7248 static void init_tpl_buffer(VP9_COMP *cpi) {
7249   VP9_COMMON *cm = &cpi->common;
7250   int frame;
7251
7252   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7253   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7254 #if CONFIG_NON_GREEDY_MV
7255   int rf_idx;
7256
7257   vpx_free(cpi->select_mv_arr);
7258   CHECK_MEM_ERROR(
7259       cm, cpi->select_mv_arr,
7260       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
7261 #endif
7262
7263   // TODO(jingning): Reduce the actual memory use for tpl model build up.
7264   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7265     if (cpi->tpl_stats[frame].width >= mi_cols &&
7266         cpi->tpl_stats[frame].height >= mi_rows &&
7267         cpi->tpl_stats[frame].tpl_stats_ptr)
7268       continue;
7269
7270 #if CONFIG_NON_GREEDY_MV
7271     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7272       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7273       CHECK_MEM_ERROR(
7274           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
7275           vpx_calloc(mi_rows * mi_cols * 4,
7276                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
7277       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7278       CHECK_MEM_ERROR(
7279           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
7280           vpx_calloc(mi_rows * mi_cols * 4,
7281                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
7282     }
7283 #endif
7284     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7285     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
7286                     vpx_calloc(mi_rows * mi_cols,
7287                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
7288     cpi->tpl_stats[frame].is_valid = 0;
7289     cpi->tpl_stats[frame].width = mi_cols;
7290     cpi->tpl_stats[frame].height = mi_rows;
7291     cpi->tpl_stats[frame].stride = mi_cols;
7292     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
7293     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7294   }
7295
7296   for (frame = 0; frame < REF_FRAMES; ++frame) {
7297     cpi->enc_frame_buf[frame].mem_valid = 0;
7298     cpi->enc_frame_buf[frame].released = 1;
7299   }
7300 }
7301
7302 static void free_tpl_buffer(VP9_COMP *cpi) {
7303   int frame;
7304 #if CONFIG_NON_GREEDY_MV
7305   vp9_free_motion_field_info(&cpi->motion_field_info);
7306   vpx_free(cpi->select_mv_arr);
7307 #endif
7308   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7309 #if CONFIG_NON_GREEDY_MV
7310     int rf_idx;
7311     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7312       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7313       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7314     }
7315 #endif
7316     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7317     cpi->tpl_stats[frame].is_valid = 0;
7318   }
7319 }
7320
7321 static void setup_tpl_stats(VP9_COMP *cpi) {
7322   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7323   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7324   int tpl_group_frames = 0;
7325   int frame_idx;
7326   cpi->tpl_bsize = BLOCK_32X32;
7327
7328   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7329
7330   init_tpl_stats(cpi);
7331
7332   // Backward propagation from tpl_group_frames to 1.
7333   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7334     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7335     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7336   }
7337 #if CONFIG_NON_GREEDY_MV
7338   cpi->tpl_ready = 1;
7339 #if DUMP_TPL_STATS
7340   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7341 #endif  // DUMP_TPL_STATS
7342 #endif  // CONFIG_NON_GREEDY_MV
7343 }
7344
7345 #if !CONFIG_REALTIME_ONLY
7346 #if CONFIG_RATE_CTRL
7347 static void copy_frame_counts(const FRAME_COUNTS *input_counts,
7348                               FRAME_COUNTS *output_counts) {
7349   int i, j, k, l, m, n;
7350   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
7351     for (j = 0; j < INTRA_MODES; ++j) {
7352       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
7353     }
7354   }
7355   for (i = 0; i < INTRA_MODES; ++i) {
7356     for (j = 0; j < INTRA_MODES; ++j) {
7357       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
7358     }
7359   }
7360   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
7361     for (j = 0; j < PARTITION_TYPES; ++j) {
7362       output_counts->partition[i][j] = input_counts->partition[i][j];
7363     }
7364   }
7365   for (i = 0; i < TX_SIZES; ++i) {
7366     for (j = 0; j < PLANE_TYPES; ++j) {
7367       for (k = 0; k < REF_TYPES; ++k) {
7368         for (l = 0; l < COEF_BANDS; ++l) {
7369           for (m = 0; m < COEFF_CONTEXTS; ++m) {
7370             output_counts->eob_branch[i][j][k][l][m] =
7371                 input_counts->eob_branch[i][j][k][l][m];
7372             for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
7373               output_counts->coef[i][j][k][l][m][n] =
7374                   input_counts->coef[i][j][k][l][m][n];
7375             }
7376           }
7377         }
7378       }
7379     }
7380   }
7381   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
7382     for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
7383       output_counts->switchable_interp[i][j] =
7384           input_counts->switchable_interp[i][j];
7385     }
7386   }
7387   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
7388     for (j = 0; j < INTER_MODES; ++j) {
7389       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
7390     }
7391   }
7392   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
7393     for (j = 0; j < 2; ++j) {
7394       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
7395     }
7396   }
7397   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
7398     for (j = 0; j < 2; ++j) {
7399       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
7400     }
7401   }
7402   for (i = 0; i < REF_CONTEXTS; ++i) {
7403     for (j = 0; j < 2; ++j) {
7404       for (k = 0; k < 2; ++k) {
7405         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
7406       }
7407     }
7408   }
7409   for (i = 0; i < REF_CONTEXTS; ++i) {
7410     for (j = 0; j < 2; ++j) {
7411       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
7412     }
7413   }
7414   for (i = 0; i < SKIP_CONTEXTS; ++i) {
7415     for (j = 0; j < 2; ++j) {
7416       output_counts->skip[i][j] = input_counts->skip[i][j];
7417     }
7418   }
7419   for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
7420     for (j = 0; j < TX_SIZES; j++) {
7421       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
7422     }
7423     for (j = 0; j < TX_SIZES - 1; j++) {
7424       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
7425     }
7426     for (j = 0; j < TX_SIZES - 2; j++) {
7427       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
7428     }
7429   }
7430   for (i = 0; i < TX_SIZES; i++) {
7431     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
7432   }
7433   for (i = 0; i < MV_JOINTS; i++) {
7434     output_counts->mv.joints[i] = input_counts->mv.joints[i];
7435   }
7436   for (k = 0; k < 2; k++) {
7437     nmv_component_counts *const comps = &output_counts->mv.comps[k];
7438     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
7439     for (i = 0; i < 2; i++) {
7440       comps->sign[i] = comps_t->sign[i];
7441       comps->class0_hp[i] = comps_t->class0_hp[i];
7442       comps->hp[i] = comps_t->hp[i];
7443     }
7444     for (i = 0; i < MV_CLASSES; i++) {
7445       comps->classes[i] = comps_t->classes[i];
7446     }
7447     for (i = 0; i < CLASS0_SIZE; i++) {
7448       comps->class0[i] = comps_t->class0[i];
7449       for (j = 0; j < MV_FP_SIZE; j++) {
7450         comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
7451       }
7452     }
7453     for (i = 0; i < MV_OFFSET_BITS; i++) {
7454       for (j = 0; j < 2; j++) {
7455         comps->bits[i][j] = comps_t->bits[i][j];
7456       }
7457     }
7458     for (i = 0; i < MV_FP_SIZE; i++) {
7459       comps->fp[i] = comps_t->fp[i];
7460     }
7461   }
7462 }
7463
7464 static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
7465                                         IMAGE_BUFFER *image_buffer) {
7466   const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
7467                                    yv12_buffer->v_buffer };
7468   const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
7469                                  yv12_buffer->uv_stride };
7470   const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
7471                         yv12_buffer->uv_crop_width };
7472   const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
7473                         yv12_buffer->uv_crop_height };
7474   int plane;
7475   for (plane = 0; plane < 3; ++plane) {
7476     const int src_stride = src_stride_ls[plane];
7477     const int w = w_ls[plane];
7478     const int h = h_ls[plane];
7479     const uint8_t *src_buf = src_buf_ls[plane];
7480     uint8_t *dst_buf = image_buffer->plane_buffer[plane];
7481     int r;
7482     assert(image_buffer->plane_width[plane] == w);
7483     assert(image_buffer->plane_height[plane] == h);
7484     for (r = 0; r < h; ++r) {
7485       memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
7486       src_buf += src_stride;
7487       dst_buf += w;
7488     }
7489   }
7490 }
7491 #endif  // CONFIG_RATE_CTRL
7492 static void update_encode_frame_result(
7493     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
7494     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
7495     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
7496     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
7497 #if CONFIG_RATE_CTRL
7498     const PARTITION_INFO *partition_info,
7499     const MOTION_VECTOR_INFO *motion_vector_info,
7500 #endif  // CONFIG_RATE_CTRL
7501     ENCODE_FRAME_RESULT *encode_frame_result) {
7502 #if CONFIG_RATE_CTRL
7503   PSNR_STATS psnr;
7504 #if CONFIG_VP9_HIGHBITDEPTH
7505   vpx_calc_highbd_psnr(source_frame, coded_frame_buf->buf, &psnr, bit_depth,
7506                        input_bit_depth);
7507 #else   // CONFIG_VP9_HIGHBITDEPTH
7508   (void)bit_depth;
7509   (void)input_bit_depth;
7510   vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
7511 #endif  // CONFIG_VP9_HIGHBITDEPTH
7512   encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
7513
7514   if (update_type != KF_UPDATE) {
7515     const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
7516                                                                  VP9_GOLD_FLAG,
7517                                                                  VP9_ALT_FLAG };
7518     int i;
7519     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7520       assert(ref_frame_bufs[i] != NULL);
7521       encode_frame_result->ref_frame_coding_indexes[i] =
7522           ref_frame_bufs[i]->frame_coding_index;
7523       encode_frame_result->ref_frame_valid_list[i] =
7524           (ref_frame_flags & inter_ref_flags[i]) != 0;
7525     }
7526   } else {
7527     // No reference frame is available when this is a key frame.
7528     int i;
7529     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7530       encode_frame_result->ref_frame_coding_indexes[i] = -1;
7531       encode_frame_result->ref_frame_valid_list[i] = 0;
7532     }
7533   }
7534   encode_frame_result->psnr = psnr.psnr[0];
7535   encode_frame_result->sse = psnr.sse[0];
7536   copy_frame_counts(counts, &encode_frame_result->frame_counts);
7537   encode_frame_result->partition_info = partition_info;
7538   encode_frame_result->motion_vector_info = motion_vector_info;
7539   if (encode_frame_result->coded_frame.allocated) {
7540     yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
7541                                 &encode_frame_result->coded_frame);
7542   }
7543 #else   // CONFIG_RATE_CTRL
7544   (void)ref_frame_flags;
7545   (void)bit_depth;
7546   (void)input_bit_depth;
7547   (void)source_frame;
7548   (void)coded_frame_buf;
7549   (void)ref_frame_bufs;
7550   (void)counts;
7551 #endif  // CONFIG_RATE_CTRL
7552   encode_frame_result->show_idx = coded_frame_buf->frame_index;
7553   encode_frame_result->update_type = update_type;
7554   encode_frame_result->quantize_index = quantize_index;
7555 }
7556 #endif  // !CONFIG_REALTIME_ONLY
7557
7558 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
7559   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
7560 #if CONFIG_RATE_CTRL
7561   encode_frame_result->frame_coding_index = -1;
7562   vp9_zero(encode_frame_result->coded_frame);
7563   encode_frame_result->coded_frame.allocated = 0;
7564 #endif  // CONFIG_RATE_CTRL
7565 }
7566
7567 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7568                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7569                             int64_t *time_end, int flush,
7570                             ENCODE_FRAME_RESULT *encode_frame_result) {
7571   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7572   VP9_COMMON *const cm = &cpi->common;
7573   BufferPool *const pool = cm->buffer_pool;
7574   RATE_CONTROL *const rc = &cpi->rc;
7575   struct vpx_usec_timer cmptimer;
7576   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7577   struct lookahead_entry *last_source = NULL;
7578   struct lookahead_entry *source = NULL;
7579   int arf_src_index;
7580   const int gf_group_index = cpi->twopass.gf_group.index;
7581   int i;
7582
7583   if (is_one_pass_cbr_svc(cpi)) {
7584     vp9_one_pass_cbr_svc_start_layer(cpi);
7585   }
7586
7587   vpx_usec_timer_start(&cmptimer);
7588
7589   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7590
7591   // Is multi-arf enabled.
7592   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7593   // will not work properly with svc.
7594   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7595   // is greater than or equal to 2.
7596   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7597     cpi->multi_layer_arf = 1;
7598   else
7599     cpi->multi_layer_arf = 0;
7600
7601   // Normal defaults
7602   cm->reset_frame_context = 0;
7603   cm->refresh_frame_context = 1;
7604   if (!is_one_pass_cbr_svc(cpi)) {
7605     cpi->refresh_last_frame = 1;
7606     cpi->refresh_golden_frame = 0;
7607     cpi->refresh_alt_ref_frame = 0;
7608   }
7609
7610   // Should we encode an arf frame.
7611   arf_src_index = get_arf_src_index(cpi);
7612
7613   if (arf_src_index) {
7614     for (i = 0; i <= arf_src_index; ++i) {
7615       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7616       // Avoid creating an alt-ref if there's a forced keyframe pending.
7617       if (e == NULL) {
7618         break;
7619       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7620         arf_src_index = 0;
7621         flush = 1;
7622         break;
7623       }
7624     }
7625   }
7626
7627   // Clear arf index stack before group of pictures processing starts.
7628   if (gf_group_index == 1) {
7629     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7630     cpi->twopass.gf_group.stack_size = 0;
7631   }
7632
7633   if (arf_src_index) {
7634     assert(arf_src_index <= rc->frames_to_key);
7635     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7636       cpi->alt_ref_source = source;
7637
7638 #if !CONFIG_REALTIME_ONLY
7639       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7640           (oxcf->arnr_strength > 0)) {
7641         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7642         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7643
7644         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7645         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7646
7647         // Produce the filtered ARF frame.
7648         vp9_temporal_filter(cpi, arf_src_index);
7649         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7650
7651         // for small bitrates segmentation overhead usually
7652         // eats all bitrate gain from enabling delta quantizers
7653         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7654           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7655
7656         force_src_buffer = &cpi->alt_ref_buffer;
7657       }
7658 #endif
7659       cm->show_frame = 0;
7660       cm->intra_only = 0;
7661       cpi->refresh_alt_ref_frame = 1;
7662       cpi->refresh_golden_frame = 0;
7663       cpi->refresh_last_frame = 0;
7664       rc->is_src_frame_alt_ref = 0;
7665       rc->source_alt_ref_pending = 0;
7666     } else {
7667       rc->source_alt_ref_pending = 0;
7668     }
7669   }
7670
7671   if (!source) {
7672     // Get last frame source.
7673     if (cm->current_video_frame > 0) {
7674       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7675         return -1;
7676     }
7677
7678     // Read in the source frame.
7679     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7680       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7681     else
7682       source = vp9_lookahead_pop(cpi->lookahead, flush);
7683
7684     if (source != NULL) {
7685       cm->show_frame = 1;
7686       cm->intra_only = 0;
7687       // If the flags indicate intra frame, but if the current picture is for
7688       // spatial layer above first_spatial_layer_to_encode, it should not be an
7689       // intra picture.
7690       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7691           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7692         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7693       }
7694
7695       // Check to see if the frame should be encoded as an arf overlay.
7696       check_src_altref(cpi, source);
7697     }
7698   }
7699
7700   if (source) {
7701     cpi->un_scaled_source = cpi->Source =
7702         force_src_buffer ? force_src_buffer : &source->img;
7703
7704 #ifdef ENABLE_KF_DENOISE
7705     // Copy of raw source for metrics calculation.
7706     if (is_psnr_calc_enabled(cpi))
7707       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7708 #endif
7709
7710     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7711
7712     *time_stamp = source->ts_start;
7713     *time_end = source->ts_end;
7714     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7715   } else {
7716     *size = 0;
7717     return -1;
7718   }
7719
7720   if (source->ts_start < cpi->first_time_stamp_ever) {
7721     cpi->first_time_stamp_ever = source->ts_start;
7722     cpi->last_end_time_stamp_seen = source->ts_start;
7723   }
7724
7725   // Clear down mmx registers
7726   vpx_clear_system_state();
7727
7728   // adjust frame rates based on timestamps given
7729   if (cm->show_frame) {
7730     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7731         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7732       vp9_svc_adjust_frame_rate(cpi);
7733     else
7734       adjust_frame_rate(cpi, source);
7735   }
7736
7737   if (is_one_pass_cbr_svc(cpi)) {
7738     vp9_update_temporal_layer_framerate(cpi);
7739     vp9_restore_layer_context(cpi);
7740   }
7741
7742   // Find a free buffer for the new frame, releasing the reference previously
7743   // held.
7744   if (cm->new_fb_idx != INVALID_IDX) {
7745     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7746   }
7747   cm->new_fb_idx = get_free_fb(cm);
7748
7749   if (cm->new_fb_idx == INVALID_IDX) return -1;
7750
7751   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7752
7753   // Start with a 0 size frame.
7754   *size = 0;
7755
7756   cpi->frame_flags = *frame_flags;
7757
7758 #if !CONFIG_REALTIME_ONLY
7759   if ((oxcf->pass == 2) && !cpi->use_svc) {
7760     vp9_rc_get_second_pass_params(cpi);
7761   } else if (oxcf->pass == 1) {
7762     set_frame_size(cpi);
7763   }
7764 #endif  // !CONFIG_REALTIME_ONLY
7765
7766   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7767       cpi->level_constraint.fail_flag == 0)
7768     level_rc_framerate(cpi, arf_src_index);
7769
7770   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7771     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7772   }
7773
7774   if (cpi->kmeans_data_arr_alloc == 0) {
7775     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7776     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7777 #if CONFIG_MULTITHREAD
7778     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7779 #endif
7780     CHECK_MEM_ERROR(
7781         cm, cpi->kmeans_data_arr,
7782         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7783     cpi->kmeans_data_stride = mi_cols;
7784     cpi->kmeans_data_arr_alloc = 1;
7785   }
7786
7787 #if CONFIG_NON_GREEDY_MV
7788   {
7789     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7790     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7791     Status status = vp9_alloc_motion_field_info(
7792         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7793     if (status == STATUS_FAILED) {
7794       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7795                          "vp9_alloc_motion_field_info failed");
7796     }
7797   }
7798 #endif  // CONFIG_NON_GREEDY_MV
7799
7800   if (gf_group_index == 1 &&
7801       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7802       cpi->sf.enable_tpl_model) {
7803     init_tpl_buffer(cpi);
7804     vp9_estimate_qp_gop(cpi);
7805     setup_tpl_stats(cpi);
7806   }
7807
7808 #if CONFIG_BITSTREAM_DEBUG
7809   assert(cpi->oxcf.max_threads == 0 &&
7810          "bitstream debug tool does not support multithreading");
7811   bitstream_queue_record_write();
7812 #endif
7813 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7814   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7815 #endif
7816
7817   cpi->td.mb.fp_src_pred = 0;
7818 #if CONFIG_REALTIME_ONLY
7819   (void)encode_frame_result;
7820   if (cpi->use_svc) {
7821     SvcEncode(cpi, size, dest, frame_flags);
7822   } else {
7823     // One pass encode
7824     Pass0Encode(cpi, size, dest, frame_flags);
7825   }
7826 #else  // !CONFIG_REALTIME_ONLY
7827   if (oxcf->pass == 1 && !cpi->use_svc) {
7828     const int lossless = is_lossless_requested(oxcf);
7829 #if CONFIG_VP9_HIGHBITDEPTH
7830     if (cpi->oxcf.use_highbitdepth)
7831       cpi->td.mb.fwd_txfm4x4 =
7832           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7833     else
7834       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7835     cpi->td.mb.highbd_inv_txfm_add =
7836         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7837 #else
7838     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7839 #endif  // CONFIG_VP9_HIGHBITDEPTH
7840     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7841     vp9_first_pass(cpi, source);
7842   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7843     Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
7844     vp9_twopass_postencode_update(cpi);
7845   } else if (cpi->use_svc) {
7846     SvcEncode(cpi, size, dest, frame_flags);
7847   } else {
7848     // One pass encode
7849     Pass0Encode(cpi, size, dest, frame_flags);
7850   }
7851 #endif  // CONFIG_REALTIME_ONLY
7852
7853   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7854
7855   if (cm->refresh_frame_context)
7856     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7857
7858   // No frame encoded, or frame was dropped, release scaled references.
7859   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7860     release_scaled_references(cpi);
7861   }
7862
7863   if (*size > 0) {
7864     cpi->droppable = !frame_is_reference(cpi);
7865   }
7866
7867   // Save layer specific state.
7868   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7869                                     cpi->svc.number_spatial_layers > 1) &&
7870                                    oxcf->pass == 2)) {
7871     vp9_save_layer_context(cpi);
7872   }
7873
7874   vpx_usec_timer_mark(&cmptimer);
7875   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7876
7877   if (cpi->keep_level_stats && oxcf->pass != 1)
7878     update_level_info(cpi, size, arf_src_index);
7879
7880 #if CONFIG_INTERNAL_STATS
7881
7882   if (oxcf->pass != 1) {
7883     double samples = 0.0;
7884     cpi->bytes += (int)(*size);
7885
7886     if (cm->show_frame) {
7887       uint32_t bit_depth = 8;
7888       uint32_t in_bit_depth = 8;
7889       cpi->count++;
7890 #if CONFIG_VP9_HIGHBITDEPTH
7891       if (cm->use_highbitdepth) {
7892         in_bit_depth = cpi->oxcf.input_bit_depth;
7893         bit_depth = cm->bit_depth;
7894       }
7895 #endif
7896
7897       if (cpi->b_calculate_psnr) {
7898         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7899         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7900         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7901         PSNR_STATS psnr;
7902 #if CONFIG_VP9_HIGHBITDEPTH
7903         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7904                              in_bit_depth);
7905 #else
7906         vpx_calc_psnr(orig, recon, &psnr);
7907 #endif  // CONFIG_VP9_HIGHBITDEPTH
7908
7909         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7910                           psnr.psnr[0], &cpi->psnr);
7911         cpi->total_sq_error += psnr.sse[0];
7912         cpi->total_samples += psnr.samples[0];
7913         samples = psnr.samples[0];
7914
7915         {
7916           PSNR_STATS psnr2;
7917           double frame_ssim2 = 0, weight = 0;
7918 #if CONFIG_VP9_POSTPROC
7919           if (vpx_alloc_frame_buffer(
7920                   pp, recon->y_crop_width, recon->y_crop_height,
7921                   cm->subsampling_x, cm->subsampling_y,
7922 #if CONFIG_VP9_HIGHBITDEPTH
7923                   cm->use_highbitdepth,
7924 #endif
7925                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7926             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7927                                "Failed to allocate post processing buffer");
7928           }
7929           {
7930             vp9_ppflags_t ppflags;
7931             ppflags.post_proc_flag = VP9D_DEBLOCK;
7932             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7933             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7934             vp9_post_proc_frame(cm, pp, &ppflags,
7935                                 cpi->un_scaled_source->y_width);
7936           }
7937 #endif
7938           vpx_clear_system_state();
7939
7940 #if CONFIG_VP9_HIGHBITDEPTH
7941           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7942                                cpi->oxcf.input_bit_depth);
7943 #else
7944           vpx_calc_psnr(orig, pp, &psnr2);
7945 #endif  // CONFIG_VP9_HIGHBITDEPTH
7946
7947           cpi->totalp_sq_error += psnr2.sse[0];
7948           cpi->totalp_samples += psnr2.samples[0];
7949           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7950                             psnr2.psnr[0], &cpi->psnrp);
7951
7952 #if CONFIG_VP9_HIGHBITDEPTH
7953           if (cm->use_highbitdepth) {
7954             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7955                                                in_bit_depth);
7956           } else {
7957             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7958           }
7959 #else
7960           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7961 #endif  // CONFIG_VP9_HIGHBITDEPTH
7962
7963           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7964           cpi->summed_quality += frame_ssim2 * weight;
7965           cpi->summed_weights += weight;
7966
7967 #if CONFIG_VP9_HIGHBITDEPTH
7968           if (cm->use_highbitdepth) {
7969             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7970                                                in_bit_depth);
7971           } else {
7972             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7973           }
7974 #else
7975           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7976 #endif  // CONFIG_VP9_HIGHBITDEPTH
7977
7978           cpi->summedp_quality += frame_ssim2 * weight;
7979           cpi->summedp_weights += weight;
7980 #if 0
7981           if (cm->show_frame) {
7982             FILE *f = fopen("q_used.stt", "a");
7983             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7984                     cpi->common.current_video_frame, psnr2.psnr[1],
7985                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7986             fclose(f);
7987           }
7988 #endif
7989         }
7990       }
7991       if (cpi->b_calculate_blockiness) {
7992 #if CONFIG_VP9_HIGHBITDEPTH
7993         if (!cm->use_highbitdepth)
7994 #endif
7995         {
7996           double frame_blockiness = vp9_get_blockiness(
7997               cpi->Source->y_buffer, cpi->Source->y_stride,
7998               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7999               cpi->Source->y_width, cpi->Source->y_height);
8000           cpi->worst_blockiness =
8001               VPXMAX(cpi->worst_blockiness, frame_blockiness);
8002           cpi->total_blockiness += frame_blockiness;
8003         }
8004       }
8005
8006       if (cpi->b_calculate_consistency) {
8007 #if CONFIG_VP9_HIGHBITDEPTH
8008         if (!cm->use_highbitdepth)
8009 #endif
8010         {
8011           double this_inconsistency = vpx_get_ssim_metrics(
8012               cpi->Source->y_buffer, cpi->Source->y_stride,
8013               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
8014               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
8015               &cpi->metrics, 1);
8016
8017           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
8018           double consistency =
8019               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
8020           if (consistency > 0.0)
8021             cpi->worst_consistency =
8022                 VPXMIN(cpi->worst_consistency, consistency);
8023           cpi->total_inconsistency += this_inconsistency;
8024         }
8025       }
8026
8027       {
8028         double y, u, v, frame_all;
8029         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
8030                                       &v, bit_depth, in_bit_depth);
8031         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
8032       }
8033       {
8034         double y, u, v, frame_all;
8035         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
8036                                 bit_depth, in_bit_depth);
8037         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
8038       }
8039     }
8040   }
8041
8042 #endif
8043
8044   if (is_one_pass_cbr_svc(cpi)) {
8045     if (cm->show_frame) {
8046       ++cpi->svc.spatial_layer_to_encode;
8047       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
8048         cpi->svc.spatial_layer_to_encode = 0;
8049     }
8050   }
8051
8052   vpx_clear_system_state();
8053   return 0;
8054 }
8055
8056 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
8057                               vp9_ppflags_t *flags) {
8058   VP9_COMMON *cm = &cpi->common;
8059 #if !CONFIG_VP9_POSTPROC
8060   (void)flags;
8061 #endif
8062
8063   if (!cm->show_frame) {
8064     return -1;
8065   } else {
8066     int ret;
8067 #if CONFIG_VP9_POSTPROC
8068     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
8069 #else
8070     if (cm->frame_to_show) {
8071       *dest = *cm->frame_to_show;
8072       dest->y_width = cm->width;
8073       dest->y_height = cm->height;
8074       dest->uv_width = cm->width >> cm->subsampling_x;
8075       dest->uv_height = cm->height >> cm->subsampling_y;
8076       ret = 0;
8077     } else {
8078       ret = -1;
8079     }
8080 #endif  // !CONFIG_VP9_POSTPROC
8081     vpx_clear_system_state();
8082     return ret;
8083   }
8084 }
8085
8086 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
8087                           VPX_SCALING vert_mode) {
8088   VP9_COMMON *cm = &cpi->common;
8089   int hr = 0, hs = 0, vr = 0, vs = 0;
8090
8091   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
8092
8093   Scale2Ratio(horiz_mode, &hr, &hs);
8094   Scale2Ratio(vert_mode, &vr, &vs);
8095
8096   // always go to the next whole number
8097   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
8098   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
8099   if (cm->current_video_frame) {
8100     assert(cm->width <= cpi->initial_width);
8101     assert(cm->height <= cpi->initial_height);
8102   }
8103
8104   update_frame_size(cpi);
8105
8106   return 0;
8107 }
8108
8109 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
8110                          unsigned int height) {
8111   VP9_COMMON *cm = &cpi->common;
8112 #if CONFIG_VP9_HIGHBITDEPTH
8113   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
8114 #else
8115   update_initial_width(cpi, 0, 1, 1);
8116 #endif  // CONFIG_VP9_HIGHBITDEPTH
8117
8118 #if CONFIG_VP9_TEMPORAL_DENOISING
8119   setup_denoiser_buffer(cpi);
8120 #endif
8121   alloc_raw_frame_buffers(cpi);
8122   if (width) {
8123     cm->width = width;
8124     if (cm->width > cpi->initial_width) {
8125       cm->width = cpi->initial_width;
8126       printf("Warning: Desired width too large, changed to %d\n", cm->width);
8127     }
8128   }
8129
8130   if (height) {
8131     cm->height = height;
8132     if (cm->height > cpi->initial_height) {
8133       cm->height = cpi->initial_height;
8134       printf("Warning: Desired height too large, changed to %d\n", cm->height);
8135     }
8136   }
8137   assert(cm->width <= cpi->initial_width);
8138   assert(cm->height <= cpi->initial_height);
8139
8140   update_frame_size(cpi);
8141
8142   return 0;
8143 }
8144
8145 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
8146   cpi->use_svc = use_svc;
8147   return;
8148 }
8149
8150 int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
8151
8152 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
8153   if (flags &
8154       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
8155     int ref = 7;
8156
8157     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
8158
8159     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
8160
8161     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
8162
8163     vp9_use_as_reference(cpi, ref);
8164   }
8165
8166   if (flags &
8167       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
8168        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
8169     int upd = 7;
8170
8171     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
8172
8173     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
8174
8175     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
8176
8177     vp9_update_reference(cpi, upd);
8178   }
8179
8180   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
8181     vp9_update_entropy(cpi, 0);
8182   }
8183 }
8184
8185 void vp9_set_row_mt(VP9_COMP *cpi) {
8186   // Enable row based multi-threading for supported modes of encoding
8187   cpi->row_mt = 0;
8188   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
8189        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
8190       cpi->oxcf.row_mt && !cpi->use_svc)
8191     cpi->row_mt = 1;
8192
8193   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
8194       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
8195       !cpi->use_svc)
8196     cpi->row_mt = 1;
8197
8198   // In realtime mode, enable row based multi-threading for all the speed levels
8199   // where non-rd path is used.
8200   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
8201     cpi->row_mt = 1;
8202   }
8203
8204   if (cpi->row_mt)
8205     cpi->row_mt_bit_exact = 1;
8206   else
8207     cpi->row_mt_bit_exact = 0;
8208 }