granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI structure in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024 #if CONFIG_RATE_CTRL
1025   free_partition_info(cpi);
1026   free_motion_vector_info(cpi);
1027   free_fp_motion_vector_info(cpi);
1028 #endif
1029
1030   vp9_free_ref_frame_buffers(cm->buffer_pool);
1031 #if CONFIG_VP9_POSTPROC
1032   vp9_free_postproc_buffers(cm);
1033 #endif
1034   vp9_free_context_buffers(cm);
1035
1036   vpx_free_frame_buffer(&cpi->last_frame_uf);
1037   vpx_free_frame_buffer(&cpi->scaled_source);
1038   vpx_free_frame_buffer(&cpi->scaled_last_source);
1039   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1040 #ifdef ENABLE_KF_DENOISE
1041   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1042   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1043 #endif
1044
1045   vp9_lookahead_destroy(cpi->lookahead);
1046
1047   vpx_free(cpi->tile_tok[0][0]);
1048   cpi->tile_tok[0][0] = 0;
1049
1050   vpx_free(cpi->tplist[0][0]);
1051   cpi->tplist[0][0] = NULL;
1052
1053   vp9_free_pc_tree(&cpi->td);
1054
1055   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1056     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1057     vpx_free(lc->rc_twopass_stats_in.buf);
1058     lc->rc_twopass_stats_in.buf = NULL;
1059     lc->rc_twopass_stats_in.sz = 0;
1060   }
1061
1062   if (cpi->source_diff_var != NULL) {
1063     vpx_free(cpi->source_diff_var);
1064     cpi->source_diff_var = NULL;
1065   }
1066
1067   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1068     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1069   }
1070   memset(&cpi->svc.scaled_frames[0], 0,
1071          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1072
1073   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1074   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1075
1076   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1077   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1078
1079   vp9_free_svc_cyclic_refresh(cpi);
1080 }
1081
1082 static void save_coding_context(VP9_COMP *cpi) {
1083   CODING_CONTEXT *const cc = &cpi->coding_context;
1084   VP9_COMMON *cm = &cpi->common;
1085
1086   // Stores a snapshot of key state variables which can subsequently be
1087   // restored with a call to vp9_restore_coding_context. These functions are
1088   // intended for use in a re-code loop in vp9_compress_frame where the
1089   // quantizer value is adjusted between loop iterations.
1090   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1091
1092   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1093          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1094   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1095          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1096   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1097          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1098   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1099          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1100
1101   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1102
1103   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1104          (cm->mi_rows * cm->mi_cols));
1105
1106   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1107   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1108
1109   cc->fc = *cm->fc;
1110 }
1111
1112 static void restore_coding_context(VP9_COMP *cpi) {
1113   CODING_CONTEXT *const cc = &cpi->coding_context;
1114   VP9_COMMON *cm = &cpi->common;
1115
1116   // Restore key state variables to the snapshot state stored in the
1117   // previous call to vp9_save_coding_context.
1118   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1119
1120   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1121   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1122   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1123          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1124   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1125          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1126
1127   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1128
1129   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1130          (cm->mi_rows * cm->mi_cols));
1131
1132   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1133   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1134
1135   *cm->fc = cc->fc;
1136 }
1137
1138 #if !CONFIG_REALTIME_ONLY
1139 static void configure_static_seg_features(VP9_COMP *cpi) {
1140   VP9_COMMON *const cm = &cpi->common;
1141   const RATE_CONTROL *const rc = &cpi->rc;
1142   struct segmentation *const seg = &cm->seg;
1143
1144   int high_q = (int)(rc->avg_q > 48.0);
1145   int qi_delta;
1146
1147   // Disable and clear down for KF
1148   if (cm->frame_type == KEY_FRAME) {
1149     // Clear down the global segmentation map
1150     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1151     seg->update_map = 0;
1152     seg->update_data = 0;
1153     cpi->static_mb_pct = 0;
1154
1155     // Disable segmentation
1156     vp9_disable_segmentation(seg);
1157
1158     // Clear down the segment features.
1159     vp9_clearall_segfeatures(seg);
1160   } else if (cpi->refresh_alt_ref_frame) {
1161     // If this is an alt ref frame
1162     // Clear down the global segmentation map
1163     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1164     seg->update_map = 0;
1165     seg->update_data = 0;
1166     cpi->static_mb_pct = 0;
1167
1168     // Disable segmentation and individual segment features by default
1169     vp9_disable_segmentation(seg);
1170     vp9_clearall_segfeatures(seg);
1171
1172     // Scan frames from current to arf frame.
1173     // This function re-enables segmentation if appropriate.
1174     vp9_update_mbgraph_stats(cpi);
1175
1176     // If segmentation was enabled set those features needed for the
1177     // arf itself.
1178     if (seg->enabled) {
1179       seg->update_map = 1;
1180       seg->update_data = 1;
1181
1182       qi_delta =
1183           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1184       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1185       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1186
1187       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1188       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1189
1190       // Where relevant assume segment data is delta data
1191       seg->abs_delta = SEGMENT_DELTADATA;
1192     }
1193   } else if (seg->enabled) {
1194     // All other frames if segmentation has been enabled
1195
1196     // First normal frame in a valid gf or alt ref group
1197     if (rc->frames_since_golden == 0) {
1198       // Set up segment features for normal frames in an arf group
1199       if (rc->source_alt_ref_active) {
1200         seg->update_map = 0;
1201         seg->update_data = 1;
1202         seg->abs_delta = SEGMENT_DELTADATA;
1203
1204         qi_delta =
1205             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1206         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1207         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1208
1209         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1210         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1211
1212         // Segment coding disabled for compred testing
1213         if (high_q || (cpi->static_mb_pct == 100)) {
1214           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1215           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1216           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1217         }
1218       } else {
1219         // Disable segmentation and clear down features if alt ref
1220         // is not active for this group
1221
1222         vp9_disable_segmentation(seg);
1223
1224         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1225
1226         seg->update_map = 0;
1227         seg->update_data = 0;
1228
1229         vp9_clearall_segfeatures(seg);
1230       }
1231     } else if (rc->is_src_frame_alt_ref) {
1232       // Special case where we are coding over the top of a previous
1233       // alt ref frame.
1234       // Segment coding disabled for compred testing
1235
1236       // Enable ref frame features for segment 0 as well
1237       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1238       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1239
1240       // All mbs should use ALTREF_FRAME
1241       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1242       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1243       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1244       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1245
1246       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1247       if (high_q) {
1248         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1249         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1250       }
1251       // Enable data update
1252       seg->update_data = 1;
1253     } else {
1254       // All other frames.
1255
1256       // No updates.. leave things as they are.
1257       seg->update_map = 0;
1258       seg->update_data = 0;
1259     }
1260   }
1261 }
1262 #endif  // !CONFIG_REALTIME_ONLY
1263
1264 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1265   VP9_COMMON *const cm = &cpi->common;
1266   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1267   uint8_t *cache_ptr = cm->last_frame_seg_map;
1268   int row, col;
1269
1270   for (row = 0; row < cm->mi_rows; row++) {
1271     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1272     uint8_t *cache = cache_ptr;
1273     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1274       cache[0] = mi_8x8[0]->segment_id;
1275     mi_8x8_ptr += cm->mi_stride;
1276     cache_ptr += cm->mi_cols;
1277   }
1278 }
1279
1280 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1281   VP9_COMMON *cm = &cpi->common;
1282   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1283
1284   if (!cpi->lookahead)
1285     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1286                                         cm->subsampling_x, cm->subsampling_y,
1287 #if CONFIG_VP9_HIGHBITDEPTH
1288                                         cm->use_highbitdepth,
1289 #endif
1290                                         oxcf->lag_in_frames);
1291   if (!cpi->lookahead)
1292     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1293                        "Failed to allocate lag buffers");
1294
1295   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1296   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1297                                cm->subsampling_x, cm->subsampling_y,
1298 #if CONFIG_VP9_HIGHBITDEPTH
1299                                cm->use_highbitdepth,
1300 #endif
1301                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1302                                NULL, NULL, NULL))
1303     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1304                        "Failed to allocate altref buffer");
1305 }
1306
1307 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1308   VP9_COMMON *const cm = &cpi->common;
1309   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1310                                cm->subsampling_x, cm->subsampling_y,
1311 #if CONFIG_VP9_HIGHBITDEPTH
1312                                cm->use_highbitdepth,
1313 #endif
1314                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1315                                NULL, NULL, NULL))
1316     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1317                        "Failed to allocate last frame buffer");
1318
1319   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1320                                cm->subsampling_x, cm->subsampling_y,
1321 #if CONFIG_VP9_HIGHBITDEPTH
1322                                cm->use_highbitdepth,
1323 #endif
1324                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1325                                NULL, NULL, NULL))
1326     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1327                        "Failed to allocate scaled source buffer");
1328
1329   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1330   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1331   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1332   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1333       cpi->svc.number_spatial_layers > 2) {
1334     cpi->svc.scaled_temp_is_alloc = 1;
1335     if (vpx_realloc_frame_buffer(
1336             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1337             cm->subsampling_x, cm->subsampling_y,
1338 #if CONFIG_VP9_HIGHBITDEPTH
1339             cm->use_highbitdepth,
1340 #endif
1341             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1342       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1343                          "Failed to allocate scaled_frame for svc ");
1344   }
1345
1346   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1347                                cm->subsampling_x, cm->subsampling_y,
1348 #if CONFIG_VP9_HIGHBITDEPTH
1349                                cm->use_highbitdepth,
1350 #endif
1351                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1352                                NULL, NULL, NULL))
1353     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1354                        "Failed to allocate scaled last source buffer");
1355 #ifdef ENABLE_KF_DENOISE
1356   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1357                                cm->subsampling_x, cm->subsampling_y,
1358 #if CONFIG_VP9_HIGHBITDEPTH
1359                                cm->use_highbitdepth,
1360 #endif
1361                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1362                                NULL, NULL, NULL))
1363     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1364                        "Failed to allocate unscaled raw source frame buffer");
1365
1366   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1367                                cm->subsampling_x, cm->subsampling_y,
1368 #if CONFIG_VP9_HIGHBITDEPTH
1369                                cm->use_highbitdepth,
1370 #endif
1371                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1372                                NULL, NULL, NULL))
1373     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1374                        "Failed to allocate scaled raw source frame buffer");
1375 #endif
1376 }
1377
1378 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1379   VP9_COMMON *cm = &cpi->common;
1380   int mi_size = cm->mi_cols * cm->mi_rows;
1381
1382   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1383   if (!cpi->mbmi_ext_base) return 1;
1384
1385   return 0;
1386 }
1387
1388 static void alloc_compressor_data(VP9_COMP *cpi) {
1389   VP9_COMMON *cm = &cpi->common;
1390   int sb_rows;
1391
1392   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1393
1394   alloc_context_buffers_ext(cpi);
1395
1396   vpx_free(cpi->tile_tok[0][0]);
1397
1398   {
1399     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1400     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1401                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1402   }
1403
1404   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1405   vpx_free(cpi->tplist[0][0]);
1406   CHECK_MEM_ERROR(
1407       cm, cpi->tplist[0][0],
1408       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1409
1410   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1411 }
1412
1413 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1414   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1415   vp9_rc_update_framerate(cpi);
1416 }
1417
1418 static void set_tile_limits(VP9_COMP *cpi) {
1419   VP9_COMMON *const cm = &cpi->common;
1420
1421   int min_log2_tile_cols, max_log2_tile_cols;
1422   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1423
1424   cm->log2_tile_cols =
1425       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1426   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1427
1428   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1429     const int level_tile_cols =
1430         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1431     if (cm->log2_tile_cols > level_tile_cols) {
1432       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1433     }
1434   }
1435 }
1436
1437 static void update_frame_size(VP9_COMP *cpi) {
1438   VP9_COMMON *const cm = &cpi->common;
1439   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1440
1441   vp9_set_mb_mi(cm, cm->width, cm->height);
1442   vp9_init_context_buffers(cm);
1443   vp9_init_macroblockd(cm, xd, NULL);
1444   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1445   memset(cpi->mbmi_ext_base, 0,
1446          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1447
1448   set_tile_limits(cpi);
1449 }
1450
1451 static void init_buffer_indices(VP9_COMP *cpi) {
1452   int ref_frame;
1453
1454   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1455     cpi->ref_fb_idx[ref_frame] = ref_frame;
1456
1457   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1458   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1459   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1460 }
1461
1462 static void init_level_constraint(LevelConstraint *lc) {
1463   lc->level_index = -1;
1464   lc->max_cpb_size = INT_MAX;
1465   lc->max_frame_size = INT_MAX;
1466   lc->fail_flag = 0;
1467 }
1468
1469 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1470   vpx_clear_system_state();
1471   ls->level_index = level_index;
1472   if (level_index >= 0) {
1473     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1474   }
1475 }
1476
1477 static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1478   VP9_COMMON *const cm = &cpi->common;
1479
1480   cpi->oxcf = *oxcf;
1481   cpi->framerate = oxcf->init_framerate;
1482   cm->profile = oxcf->profile;
1483   cm->bit_depth = oxcf->bit_depth;
1484 #if CONFIG_VP9_HIGHBITDEPTH
1485   cm->use_highbitdepth = oxcf->use_highbitdepth;
1486 #endif
1487   cm->color_space = oxcf->color_space;
1488   cm->color_range = oxcf->color_range;
1489
1490   cpi->target_level = oxcf->target_level;
1491   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1492   set_level_constraint(&cpi->level_constraint,
1493                        get_level_index(cpi->target_level));
1494
1495   cm->width = oxcf->width;
1496   cm->height = oxcf->height;
1497   alloc_compressor_data(cpi);
1498
1499   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1500
1501   // Single thread case: use counts in common.
1502   cpi->td.counts = &cm->counts;
1503
1504   // Spatial scalability.
1505   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1506   // Temporal scalability.
1507   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1508
1509   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1510       ((cpi->svc.number_temporal_layers > 1 ||
1511         cpi->svc.number_spatial_layers > 1) &&
1512        cpi->oxcf.pass != 1)) {
1513     vp9_init_layer_context(cpi);
1514   }
1515
1516   // change includes all joint functionality
1517   vp9_change_config(cpi, oxcf);
1518
1519   cpi->static_mb_pct = 0;
1520   cpi->ref_frame_flags = 0;
1521
1522   init_buffer_indices(cpi);
1523
1524   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1525 }
1526
1527 void vp9_check_reset_rc_flag(VP9_COMP *cpi) {
1528   RATE_CONTROL *rc = &cpi->rc;
1529
1530   if (cpi->common.current_video_frame >
1531       (unsigned int)cpi->svc.number_spatial_layers) {
1532     if (cpi->use_svc) {
1533       vp9_svc_check_reset_layer_rc_flag(cpi);
1534     } else {
1535       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
1536           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
1537         rc->rc_1_frame = 0;
1538         rc->rc_2_frame = 0;
1539         rc->bits_off_target = rc->optimal_buffer_level;
1540         rc->buffer_level = rc->optimal_buffer_level;
1541       }
1542     }
1543   }
1544 }
1545
1546 void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
1547   RATE_CONTROL *rc = &cpi->rc;
1548   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1549
1550   const int64_t bandwidth = oxcf->target_bandwidth;
1551   const int64_t starting = oxcf->starting_buffer_level_ms;
1552   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1553   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1554
1555   rc->starting_buffer_level = starting * bandwidth / 1000;
1556   rc->optimal_buffer_level =
1557       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1558   rc->maximum_buffer_size =
1559       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1560
1561   // Under a configuration change, where maximum_buffer_size may change,
1562   // keep buffer level clipped to the maximum allowed buffer size.
1563   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1564   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
1565 }
1566
1567 #if CONFIG_VP9_HIGHBITDEPTH
1568 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1569 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1570   cpi->fn_ptr[BT].sdf = SDF;                             \
1571   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1572   cpi->fn_ptr[BT].vf = VF;                               \
1573   cpi->fn_ptr[BT].svf = SVF;                             \
1574   cpi->fn_ptr[BT].svaf = SVAF;                           \
1575   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1576   cpi->fn_ptr[BT].sdx8f = NULL;
1577
1578 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1579   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1580                                      int source_stride,                        \
1581                                      const uint8_t *ref_ptr, int ref_stride) { \
1582     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1583   }                                                                            \
1584   static unsigned int fnname##_bits10(                                         \
1585       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1586       int ref_stride) {                                                        \
1587     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1588   }                                                                            \
1589   static unsigned int fnname##_bits12(                                         \
1590       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1591       int ref_stride) {                                                        \
1592     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1593   }
1594
1595 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1596   static unsigned int fnname##_bits8(                                          \
1597       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1598       int ref_stride, const uint8_t *second_pred) {                            \
1599     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1600   }                                                                            \
1601   static unsigned int fnname##_bits10(                                         \
1602       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1603       int ref_stride, const uint8_t *second_pred) {                            \
1604     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1605            2;                                                                  \
1606   }                                                                            \
1607   static unsigned int fnname##_bits12(                                         \
1608       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1609       int ref_stride, const uint8_t *second_pred) {                            \
1610     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1611            4;                                                                  \
1612   }
1613
1614 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1615   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1616                              const uint8_t *const ref_ptr[], int ref_stride,  \
1617                              unsigned int *sad_array) {                       \
1618     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1619   }                                                                           \
1620   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1621                               const uint8_t *const ref_ptr[], int ref_stride, \
1622                               unsigned int *sad_array) {                      \
1623     int i;                                                                    \
1624     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1625     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1626   }                                                                           \
1627   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1628                               const uint8_t *const ref_ptr[], int ref_stride, \
1629                               unsigned int *sad_array) {                      \
1630     int i;                                                                    \
1631     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1632     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1633   }
1634
1635 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1636 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1637 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1638 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1639 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1640 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1641 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1642 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1643 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1644 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1645 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1646 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1647 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1648 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1649 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1650 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1651 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1652 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1653 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1654 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1655 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1656 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1657 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1658 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1659 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1660 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1661 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1662 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1663 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1664 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1665 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1666 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1667 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1668 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1669 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1670 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1671 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1672 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1673 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1674
1675 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1676   VP9_COMMON *const cm = &cpi->common;
1677   if (cm->use_highbitdepth) {
1678     switch (cm->bit_depth) {
1679       case VPX_BITS_8:
1680         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1681                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1682                    vpx_highbd_8_sub_pixel_variance32x16,
1683                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1684                    vpx_highbd_sad32x16x4d_bits8)
1685
1686         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1687                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1688                    vpx_highbd_8_sub_pixel_variance16x32,
1689                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1690                    vpx_highbd_sad16x32x4d_bits8)
1691
1692         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1693                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1694                    vpx_highbd_8_sub_pixel_variance64x32,
1695                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1696                    vpx_highbd_sad64x32x4d_bits8)
1697
1698         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1699                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1700                    vpx_highbd_8_sub_pixel_variance32x64,
1701                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1702                    vpx_highbd_sad32x64x4d_bits8)
1703
1704         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1705                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1706                    vpx_highbd_8_sub_pixel_variance32x32,
1707                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1708                    vpx_highbd_sad32x32x4d_bits8)
1709
1710         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1711                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1712                    vpx_highbd_8_sub_pixel_variance64x64,
1713                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1714                    vpx_highbd_sad64x64x4d_bits8)
1715
1716         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1717                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1718                    vpx_highbd_8_sub_pixel_variance16x16,
1719                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1720                    vpx_highbd_sad16x16x4d_bits8)
1721
1722         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1723                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1724                    vpx_highbd_8_sub_pixel_variance16x8,
1725                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1726                    vpx_highbd_sad16x8x4d_bits8)
1727
1728         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1729                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1730                    vpx_highbd_8_sub_pixel_variance8x16,
1731                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1732                    vpx_highbd_sad8x16x4d_bits8)
1733
1734         HIGHBD_BFP(
1735             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1736             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1737             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1738
1739         HIGHBD_BFP(
1740             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1741             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1742             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1743
1744         HIGHBD_BFP(
1745             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1746             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1747             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1748
1749         HIGHBD_BFP(
1750             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1751             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1752             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1753         break;
1754
1755       case VPX_BITS_10:
1756         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1757                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1758                    vpx_highbd_10_sub_pixel_variance32x16,
1759                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1760                    vpx_highbd_sad32x16x4d_bits10)
1761
1762         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1763                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1764                    vpx_highbd_10_sub_pixel_variance16x32,
1765                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1766                    vpx_highbd_sad16x32x4d_bits10)
1767
1768         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1769                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1770                    vpx_highbd_10_sub_pixel_variance64x32,
1771                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1772                    vpx_highbd_sad64x32x4d_bits10)
1773
1774         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1775                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1776                    vpx_highbd_10_sub_pixel_variance32x64,
1777                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1778                    vpx_highbd_sad32x64x4d_bits10)
1779
1780         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1781                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1782                    vpx_highbd_10_sub_pixel_variance32x32,
1783                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1784                    vpx_highbd_sad32x32x4d_bits10)
1785
1786         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1787                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1788                    vpx_highbd_10_sub_pixel_variance64x64,
1789                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1790                    vpx_highbd_sad64x64x4d_bits10)
1791
1792         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1793                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1794                    vpx_highbd_10_sub_pixel_variance16x16,
1795                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1796                    vpx_highbd_sad16x16x4d_bits10)
1797
1798         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1799                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1800                    vpx_highbd_10_sub_pixel_variance16x8,
1801                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1802                    vpx_highbd_sad16x8x4d_bits10)
1803
1804         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1805                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1806                    vpx_highbd_10_sub_pixel_variance8x16,
1807                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1808                    vpx_highbd_sad8x16x4d_bits10)
1809
1810         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1811                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1812                    vpx_highbd_10_sub_pixel_variance8x8,
1813                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1814                    vpx_highbd_sad8x8x4d_bits10)
1815
1816         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1817                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1818                    vpx_highbd_10_sub_pixel_variance8x4,
1819                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1820                    vpx_highbd_sad8x4x4d_bits10)
1821
1822         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1823                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1824                    vpx_highbd_10_sub_pixel_variance4x8,
1825                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1826                    vpx_highbd_sad4x8x4d_bits10)
1827
1828         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1829                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1830                    vpx_highbd_10_sub_pixel_variance4x4,
1831                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1832                    vpx_highbd_sad4x4x4d_bits10)
1833         break;
1834
1835       default:
1836         assert(cm->bit_depth == VPX_BITS_12);
1837         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1838                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1839                    vpx_highbd_12_sub_pixel_variance32x16,
1840                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1841                    vpx_highbd_sad32x16x4d_bits12)
1842
1843         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1844                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1845                    vpx_highbd_12_sub_pixel_variance16x32,
1846                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1847                    vpx_highbd_sad16x32x4d_bits12)
1848
1849         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1850                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1851                    vpx_highbd_12_sub_pixel_variance64x32,
1852                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1853                    vpx_highbd_sad64x32x4d_bits12)
1854
1855         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1856                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1857                    vpx_highbd_12_sub_pixel_variance32x64,
1858                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1859                    vpx_highbd_sad32x64x4d_bits12)
1860
1861         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1862                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1863                    vpx_highbd_12_sub_pixel_variance32x32,
1864                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1865                    vpx_highbd_sad32x32x4d_bits12)
1866
1867         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1868                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1869                    vpx_highbd_12_sub_pixel_variance64x64,
1870                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1871                    vpx_highbd_sad64x64x4d_bits12)
1872
1873         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1874                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1875                    vpx_highbd_12_sub_pixel_variance16x16,
1876                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1877                    vpx_highbd_sad16x16x4d_bits12)
1878
1879         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1880                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1881                    vpx_highbd_12_sub_pixel_variance16x8,
1882                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1883                    vpx_highbd_sad16x8x4d_bits12)
1884
1885         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1886                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1887                    vpx_highbd_12_sub_pixel_variance8x16,
1888                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1889                    vpx_highbd_sad8x16x4d_bits12)
1890
1891         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1892                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1893                    vpx_highbd_12_sub_pixel_variance8x8,
1894                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1895                    vpx_highbd_sad8x8x4d_bits12)
1896
1897         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1898                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1899                    vpx_highbd_12_sub_pixel_variance8x4,
1900                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1901                    vpx_highbd_sad8x4x4d_bits12)
1902
1903         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1904                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1905                    vpx_highbd_12_sub_pixel_variance4x8,
1906                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1907                    vpx_highbd_sad4x8x4d_bits12)
1908
1909         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1910                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1911                    vpx_highbd_12_sub_pixel_variance4x4,
1912                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1913                    vpx_highbd_sad4x4x4d_bits12)
1914         break;
1915     }
1916   }
1917 }
1918 #endif  // CONFIG_VP9_HIGHBITDEPTH
1919
1920 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1921   VP9_COMMON *const cm = &cpi->common;
1922
1923   // Create the encoder segmentation map and set all entries to 0
1924   vpx_free(cpi->segmentation_map);
1925   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1926                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1927
1928   // Create a map used for cyclic background refresh.
1929   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1930   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1931                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1932
1933   // Create a map used to mark inactive areas.
1934   vpx_free(cpi->active_map.map);
1935   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1936                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1937
1938   // And a place holder structure is the coding context
1939   // for use if we want to save and restore it
1940   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1941   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1942                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1943 }
1944
1945 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1946   VP9_COMMON *const cm = &cpi->common;
1947   if (cpi->prev_partition == NULL) {
1948     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1949                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1950                                              sizeof(*cpi->prev_partition)));
1951   }
1952   if (cpi->prev_segment_id == NULL) {
1953     CHECK_MEM_ERROR(
1954         cm, cpi->prev_segment_id,
1955         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1956                              sizeof(*cpi->prev_segment_id)));
1957   }
1958   if (cpi->prev_variance_low == NULL) {
1959     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1960                     (uint8_t *)vpx_calloc(
1961                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1962                         sizeof(*cpi->prev_variance_low)));
1963   }
1964   if (cpi->copied_frame_cnt == NULL) {
1965     CHECK_MEM_ERROR(
1966         cm, cpi->copied_frame_cnt,
1967         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1968                               sizeof(*cpi->copied_frame_cnt)));
1969   }
1970 }
1971
1972 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1973   VP9_COMMON *const cm = &cpi->common;
1974   RATE_CONTROL *const rc = &cpi->rc;
1975   int last_w = cpi->oxcf.width;
1976   int last_h = cpi->oxcf.height;
1977
1978   vp9_init_quantizer(cpi);
1979   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1980   cm->bit_depth = oxcf->bit_depth;
1981   cm->color_space = oxcf->color_space;
1982   cm->color_range = oxcf->color_range;
1983
1984   cpi->target_level = oxcf->target_level;
1985   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1986   set_level_constraint(&cpi->level_constraint,
1987                        get_level_index(cpi->target_level));
1988
1989   if (cm->profile <= PROFILE_1)
1990     assert(cm->bit_depth == VPX_BITS_8);
1991   else
1992     assert(cm->bit_depth > VPX_BITS_8);
1993
1994   cpi->oxcf = *oxcf;
1995 #if CONFIG_VP9_HIGHBITDEPTH
1996   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1997 #endif  // CONFIG_VP9_HIGHBITDEPTH
1998
1999   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
2000     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
2001   } else {
2002     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
2003   }
2004
2005   cpi->refresh_golden_frame = 0;
2006   cpi->refresh_last_frame = 1;
2007   cm->refresh_frame_context = 1;
2008   cm->reset_frame_context = 0;
2009
2010   vp9_reset_segment_features(&cm->seg);
2011   vp9_set_high_precision_mv(cpi, 0);
2012
2013   {
2014     int i;
2015
2016     for (i = 0; i < MAX_SEGMENTS; i++)
2017       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
2018   }
2019   cpi->encode_breakout = cpi->oxcf.encode_breakout;
2020
2021   vp9_set_rc_buffer_sizes(cpi);
2022
2023   // Set up frame rate and related parameters rate control values.
2024   vp9_new_framerate(cpi, cpi->framerate);
2025
2026   // Set absolute upper and lower quality limits
2027   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2028   rc->best_quality = cpi->oxcf.best_allowed_q;
2029
2030   cm->interp_filter = cpi->sf.default_interp_filter;
2031
2032   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2033     cm->render_width = cpi->oxcf.render_width;
2034     cm->render_height = cpi->oxcf.render_height;
2035   } else {
2036     cm->render_width = cpi->oxcf.width;
2037     cm->render_height = cpi->oxcf.height;
2038   }
2039   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2040     cm->width = cpi->oxcf.width;
2041     cm->height = cpi->oxcf.height;
2042     cpi->external_resize = 1;
2043   }
2044
2045   if (cpi->initial_width) {
2046     int new_mi_size = 0;
2047     vp9_set_mb_mi(cm, cm->width, cm->height);
2048     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2049     if (cm->mi_alloc_size < new_mi_size) {
2050       vp9_free_context_buffers(cm);
2051       alloc_compressor_data(cpi);
2052       realloc_segmentation_maps(cpi);
2053       cpi->initial_width = cpi->initial_height = 0;
2054       cpi->external_resize = 0;
2055     } else if (cm->mi_alloc_size == new_mi_size &&
2056                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2057       vp9_alloc_loop_filter(cm);
2058     }
2059   }
2060
2061   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2062       last_h != cpi->oxcf.height)
2063     update_frame_size(cpi);
2064
2065   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2066     memset(cpi->consec_zero_mv, 0,
2067            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2068     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2069       vp9_cyclic_refresh_reset_resize(cpi);
2070     rc->rc_1_frame = 0;
2071     rc->rc_2_frame = 0;
2072   }
2073
2074   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2075       ((cpi->svc.number_temporal_layers > 1 ||
2076         cpi->svc.number_spatial_layers > 1) &&
2077        cpi->oxcf.pass != 1)) {
2078     vp9_update_layer_context_change_config(cpi,
2079                                            (int)cpi->oxcf.target_bandwidth);
2080   }
2081
2082   vp9_check_reset_rc_flag(cpi);
2083
2084   cpi->alt_ref_source = NULL;
2085   rc->is_src_frame_alt_ref = 0;
2086
2087 #if 0
2088   // Experimental RD Code
2089   cpi->frame_distortion = 0;
2090   cpi->last_frame_distortion = 0;
2091 #endif
2092
2093   set_tile_limits(cpi);
2094
2095   cpi->ext_refresh_frame_flags_pending = 0;
2096   cpi->ext_refresh_frame_context_pending = 0;
2097
2098 #if CONFIG_VP9_HIGHBITDEPTH
2099   highbd_set_var_fns(cpi);
2100 #endif
2101
2102   vp9_set_row_mt(cpi);
2103 }
2104
2105 #ifndef M_LOG2_E
2106 #define M_LOG2_E 0.693147180559945309417
2107 #endif
2108 #define log2f(x) (log(x) / (float)M_LOG2_E)
2109
2110 /***********************************************************************
2111  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2112  ***********************************************************************
2113  * The following 2 functions ('cal_nmvjointsadcost' and                *
2114  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2115  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2116  * function is generic, but the AVX intrinsics optimised version       *
2117  * relies on the following properties of the computed tables:          *
2118  * For cal_nmvjointsadcost:                                            *
2119  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2120  * For cal_nmvsadcosts:                                                *
2121  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2122  *         (Equal costs for both components)                           *
2123  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2124  *         (Cost function is even)                                     *
2125  * If these do not hold, then the AVX optimised version of the         *
2126  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2127  * case you can revert to using the C function instead.                *
2128  ***********************************************************************/
2129
2130 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2131   /*********************************************************************
2132    * Warning: Read the comments above before modifying this function   *
2133    *********************************************************************/
2134   mvjointsadcost[0] = 600;
2135   mvjointsadcost[1] = 300;
2136   mvjointsadcost[2] = 300;
2137   mvjointsadcost[3] = 300;
2138 }
2139
2140 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2141   /*********************************************************************
2142    * Warning: Read the comments above before modifying this function   *
2143    *********************************************************************/
2144   int i = 1;
2145
2146   mvsadcost[0][0] = 0;
2147   mvsadcost[1][0] = 0;
2148
2149   do {
2150     double z = 256 * (2 * (log2f(8 * i) + .6));
2151     mvsadcost[0][i] = (int)z;
2152     mvsadcost[1][i] = (int)z;
2153     mvsadcost[0][-i] = (int)z;
2154     mvsadcost[1][-i] = (int)z;
2155   } while (++i <= MV_MAX);
2156 }
2157
2158 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2159   int i = 1;
2160
2161   mvsadcost[0][0] = 0;
2162   mvsadcost[1][0] = 0;
2163
2164   do {
2165     double z = 256 * (2 * (log2f(8 * i) + .6));
2166     mvsadcost[0][i] = (int)z;
2167     mvsadcost[1][i] = (int)z;
2168     mvsadcost[0][-i] = (int)z;
2169     mvsadcost[1][-i] = (int)z;
2170   } while (++i <= MV_MAX);
2171 }
2172
2173 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2174   int i;
2175   BufferPool *const pool = cm->buffer_pool;
2176   cm->new_fb_idx = INVALID_IDX;
2177   for (i = 0; i < REF_FRAMES; ++i) {
2178     cm->ref_frame_map[i] = INVALID_IDX;
2179   }
2180   for (i = 0; i < FRAME_BUFFERS; ++i) {
2181     pool->frame_bufs[i].ref_count = 0;
2182   }
2183 }
2184
2185 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
2186                                  int subsampling_x, int subsampling_y) {
2187   VP9_COMMON *const cm = &cpi->common;
2188 #if !CONFIG_VP9_HIGHBITDEPTH
2189   (void)use_highbitdepth;
2190   assert(use_highbitdepth == 0);
2191 #endif
2192
2193   if (!cpi->initial_width ||
2194 #if CONFIG_VP9_HIGHBITDEPTH
2195       cm->use_highbitdepth != use_highbitdepth ||
2196 #endif
2197       cm->subsampling_x != subsampling_x ||
2198       cm->subsampling_y != subsampling_y) {
2199     cm->subsampling_x = subsampling_x;
2200     cm->subsampling_y = subsampling_y;
2201 #if CONFIG_VP9_HIGHBITDEPTH
2202     cm->use_highbitdepth = use_highbitdepth;
2203 #endif
2204     alloc_util_frame_buffers(cpi);
2205     cpi->initial_width = cm->width;
2206     cpi->initial_height = cm->height;
2207     cpi->initial_mbs = cm->MBs;
2208   }
2209 }
2210
2211 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2212 static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
2213                                               unsigned int *subsampling_x,
2214                                               unsigned int *subsampling_y) {
2215   switch (fmt) {
2216     case VPX_IMG_FMT_I420:
2217     case VPX_IMG_FMT_YV12:
2218     case VPX_IMG_FMT_I422:
2219     case VPX_IMG_FMT_I42016:
2220     case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
2221     default: *subsampling_x = 0; break;
2222   }
2223
2224   switch (fmt) {
2225     case VPX_IMG_FMT_I420:
2226     case VPX_IMG_FMT_I440:
2227     case VPX_IMG_FMT_YV12:
2228     case VPX_IMG_FMT_I42016:
2229     case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
2230     default: *subsampling_y = 0; break;
2231   }
2232 }
2233
2234 // TODO(angiebird): Check whether we can move this function to vpx_image.c
2235 static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
2236   return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
2237 }
2238
2239 #if CONFIG_VP9_TEMPORAL_DENOISING
2240 static void setup_denoiser_buffer(VP9_COMP *cpi) {
2241   VP9_COMMON *const cm = &cpi->common;
2242   if (cpi->oxcf.noise_sensitivity > 0 &&
2243       !cpi->denoiser.frame_buffer_initialized) {
2244     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
2245                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
2246                            cm->subsampling_x, cm->subsampling_y,
2247 #if CONFIG_VP9_HIGHBITDEPTH
2248                            cm->use_highbitdepth,
2249 #endif
2250                            VP9_ENC_BORDER_IN_PIXELS))
2251       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
2252                          "Failed to allocate denoiser");
2253   }
2254 }
2255 #endif
2256
2257 void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
2258   const VP9EncoderConfig *oxcf = &cpi->oxcf;
2259   unsigned int subsampling_x, subsampling_y;
2260   const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
2261   vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
2262
2263   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
2264 #if CONFIG_VP9_TEMPORAL_DENOISING
2265   setup_denoiser_buffer(cpi);
2266 #endif
2267
2268   assert(cpi->lookahead == NULL);
2269   cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
2270                                       subsampling_y,
2271 #if CONFIG_VP9_HIGHBITDEPTH
2272                                       use_highbitdepth,
2273 #endif
2274                                       oxcf->lag_in_frames);
2275   alloc_raw_frame_buffers(cpi);
2276 }
2277
2278 VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
2279                                 BufferPool *const pool) {
2280   unsigned int i;
2281   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2282   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2283
2284   if (!cm) return NULL;
2285
2286   vp9_zero(*cpi);
2287
2288   if (setjmp(cm->error.jmp)) {
2289     cm->error.setjmp = 0;
2290     vp9_remove_compressor(cpi);
2291     return 0;
2292   }
2293
2294   cm->error.setjmp = 1;
2295   cm->alloc_mi = vp9_enc_alloc_mi;
2296   cm->free_mi = vp9_enc_free_mi;
2297   cm->setup_mi = vp9_enc_setup_mi;
2298
2299   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2300   CHECK_MEM_ERROR(
2301       cm, cm->frame_contexts,
2302       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2303
2304   cpi->use_svc = 0;
2305   cpi->resize_state = ORIG;
2306   cpi->external_resize = 0;
2307   cpi->resize_avg_qp = 0;
2308   cpi->resize_buffer_underflow = 0;
2309   cpi->use_skin_detection = 0;
2310   cpi->common.buffer_pool = pool;
2311   init_ref_frame_bufs(cm);
2312
2313   cpi->force_update_segmentation = 0;
2314
2315   init_config(cpi, oxcf);
2316   cpi->frame_info = vp9_get_frame_info(oxcf);
2317
2318   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2319
2320   init_frame_indexes(cm);
2321   cpi->partition_search_skippable_frame = 0;
2322   cpi->tile_data = NULL;
2323
2324   realloc_segmentation_maps(cpi);
2325
2326   CHECK_MEM_ERROR(
2327       cm, cpi->skin_map,
2328       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2329
2330 #if !CONFIG_REALTIME_ONLY
2331   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2332 #endif
2333
2334   CHECK_MEM_ERROR(
2335       cm, cpi->consec_zero_mv,
2336       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2337
2338   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2339                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2340   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2341                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2342   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2343                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2344   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2345                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2346   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2347                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2348   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2349                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2350   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2351                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2352   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2353                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2354
2355   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2356        i++) {
2357     CHECK_MEM_ERROR(
2358         cm, cpi->mbgraph_stats[i].mb_stats,
2359         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2360   }
2361
2362 #if CONFIG_FP_MB_STATS
2363   cpi->use_fp_mb_stats = 0;
2364   if (cpi->use_fp_mb_stats) {
2365     // a place holder used to store the first pass mb stats in the first pass
2366     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2367                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2368   } else {
2369     cpi->twopass.frame_mb_stats_buf = NULL;
2370   }
2371 #endif
2372
2373   cpi->refresh_alt_ref_frame = 0;
2374   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2375
2376   init_level_info(&cpi->level_info);
2377   init_level_constraint(&cpi->level_constraint);
2378
2379 #if CONFIG_INTERNAL_STATS
2380   cpi->b_calculate_blockiness = 1;
2381   cpi->b_calculate_consistency = 1;
2382   cpi->total_inconsistency = 0;
2383   cpi->psnr.worst = 100.0;
2384   cpi->worst_ssim = 100.0;
2385
2386   cpi->count = 0;
2387   cpi->bytes = 0;
2388
2389   if (cpi->b_calculate_psnr) {
2390     cpi->total_sq_error = 0;
2391     cpi->total_samples = 0;
2392
2393     cpi->totalp_sq_error = 0;
2394     cpi->totalp_samples = 0;
2395
2396     cpi->tot_recode_hits = 0;
2397     cpi->summed_quality = 0;
2398     cpi->summed_weights = 0;
2399     cpi->summedp_quality = 0;
2400     cpi->summedp_weights = 0;
2401   }
2402
2403   cpi->fastssim.worst = 100.0;
2404
2405   cpi->psnrhvs.worst = 100.0;
2406
2407   if (cpi->b_calculate_blockiness) {
2408     cpi->total_blockiness = 0;
2409     cpi->worst_blockiness = 0.0;
2410   }
2411
2412   if (cpi->b_calculate_consistency) {
2413     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2414                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2415                                sizeof(*cpi->ssim_vars) * 4));
2416     cpi->worst_consistency = 100.0;
2417   } else {
2418     cpi->ssim_vars = NULL;
2419   }
2420
2421 #endif
2422
2423   cpi->first_time_stamp_ever = INT64_MAX;
2424
2425   /*********************************************************************
2426    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2427    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2428    *********************************************************************/
2429   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2430   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2431   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2432   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2433   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2434   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2435
2436   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2437   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2438   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2439   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2440   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2441
2442 #if CONFIG_VP9_TEMPORAL_DENOISING
2443 #ifdef OUTPUT_YUV_DENOISED
2444   yuv_denoised_file = fopen("denoised.yuv", "ab");
2445 #endif
2446 #endif
2447 #ifdef OUTPUT_YUV_SKINMAP
2448   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2449 #endif
2450 #ifdef OUTPUT_YUV_REC
2451   yuv_rec_file = fopen("rec.yuv", "wb");
2452 #endif
2453 #ifdef OUTPUT_YUV_SVC_SRC
2454   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2455   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2456   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2457 #endif
2458
2459 #if 0
2460   framepsnr = fopen("framepsnr.stt", "a");
2461   kf_list = fopen("kf_list.stt", "w");
2462 #endif
2463
2464   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2465
2466 #if !CONFIG_REALTIME_ONLY
2467   if (oxcf->pass == 1) {
2468     vp9_init_first_pass(cpi);
2469   } else if (oxcf->pass == 2) {
2470     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2471     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2472
2473     if (cpi->svc.number_spatial_layers > 1 ||
2474         cpi->svc.number_temporal_layers > 1) {
2475       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2476       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2477       int i;
2478
2479       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2480         FIRSTPASS_STATS *const last_packet_for_layer =
2481             &stats[packets - oxcf->ss_number_layers + i];
2482         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2483         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2484         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2485           int num_frames;
2486           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2487
2488           vpx_free(lc->rc_twopass_stats_in.buf);
2489
2490           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2491           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2492                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2493           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2494           lc->twopass.stats_in = lc->twopass.stats_in_start;
2495           lc->twopass.stats_in_end =
2496               lc->twopass.stats_in_start + packets_in_layer - 1;
2497           // Note the last packet is cumulative first pass stats.
2498           // So the number of frames is packet number minus one
2499           num_frames = packets_in_layer - 1;
2500           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2501                                    lc->rc_twopass_stats_in.buf, num_frames);
2502           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2503         }
2504       }
2505
2506       for (i = 0; i < packets; ++i) {
2507         const int layer_id = (int)stats[i].spatial_layer_id;
2508         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2509             stats_copy[layer_id] != NULL) {
2510           *stats_copy[layer_id] = stats[i];
2511           ++stats_copy[layer_id];
2512         }
2513       }
2514
2515       vp9_init_second_pass_spatial_svc(cpi);
2516     } else {
2517       int num_frames;
2518 #if CONFIG_FP_MB_STATS
2519       if (cpi->use_fp_mb_stats) {
2520         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2521         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2522
2523         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2524             oxcf->firstpass_mb_stats_in.buf;
2525         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2526             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2527             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2528       }
2529 #endif
2530
2531       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2532       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2533       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2534       // Note the last packet is cumulative first pass stats.
2535       // So the number of frames is packet number minus one
2536       num_frames = packets - 1;
2537       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2538                                oxcf->two_pass_stats_in.buf, num_frames);
2539
2540       vp9_init_second_pass(cpi);
2541     }
2542   }
2543 #endif  // !CONFIG_REALTIME_ONLY
2544
2545   cpi->mb_wiener_var_cols = 0;
2546   cpi->mb_wiener_var_rows = 0;
2547   cpi->mb_wiener_variance = NULL;
2548
2549   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2550   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2551
2552   {
2553     const int bsize = BLOCK_16X16;
2554     const int w = num_8x8_blocks_wide_lookup[bsize];
2555     const int h = num_8x8_blocks_high_lookup[bsize];
2556     const int num_cols = (cm->mi_cols + w - 1) / w;
2557     const int num_rows = (cm->mi_rows + h - 1) / h;
2558     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2559                     vpx_calloc(num_rows * num_cols,
2560                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2561   }
2562
2563   cpi->kmeans_data_arr_alloc = 0;
2564 #if CONFIG_NON_GREEDY_MV
2565   cpi->tpl_ready = 0;
2566 #endif  // CONFIG_NON_GREEDY_MV
2567   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2568
2569   // Allocate memory to store variances for a frame.
2570   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2571   cpi->source_var_thresh = 0;
2572   cpi->frames_till_next_var_check = 0;
2573 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2574   cpi->fn_ptr[BT].sdf = SDF;                             \
2575   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2576   cpi->fn_ptr[BT].vf = VF;                               \
2577   cpi->fn_ptr[BT].svf = SVF;                             \
2578   cpi->fn_ptr[BT].svaf = SVAF;                           \
2579   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2580   cpi->fn_ptr[BT].sdx8f = SDX8F;
2581
2582   // TODO(angiebird): make sdx8f available for every block size
2583   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2584       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2585       vpx_sad32x16x4d, NULL)
2586
2587   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2588       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2589       vpx_sad16x32x4d, NULL)
2590
2591   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2592       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2593       vpx_sad64x32x4d, NULL)
2594
2595   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2596       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2597       vpx_sad32x64x4d, NULL)
2598
2599   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2600       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2601       vpx_sad32x32x4d, vpx_sad32x32x8)
2602
2603   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2604       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2605       vpx_sad64x64x4d, NULL)
2606
2607   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2608       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2609       vpx_sad16x16x4d, vpx_sad16x16x8)
2610
2611   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2612       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2613       vpx_sad16x8x4d, vpx_sad16x8x8)
2614
2615   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2616       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2617       vpx_sad8x16x4d, vpx_sad8x16x8)
2618
2619   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2620       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2621       vpx_sad8x8x8)
2622
2623   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2624       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2625       NULL)
2626
2627   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2628       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2629       NULL)
2630
2631   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2632       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2633       vpx_sad4x4x8)
2634
2635 #if CONFIG_VP9_HIGHBITDEPTH
2636   highbd_set_var_fns(cpi);
2637 #endif
2638
2639   /* vp9_init_quantizer() is first called here. Add check in
2640    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2641    * called later when needed. This will avoid unnecessary calls of
2642    * vp9_init_quantizer() for every frame.
2643    */
2644   vp9_init_quantizer(cpi);
2645
2646   vp9_loop_filter_init(cm);
2647
2648   // Set up the unit scaling factor used during motion search.
2649 #if CONFIG_VP9_HIGHBITDEPTH
2650   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2651                                     cm->width, cm->height,
2652                                     cm->use_highbitdepth);
2653 #else
2654   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2655                                     cm->width, cm->height);
2656 #endif  // CONFIG_VP9_HIGHBITDEPTH
2657   cpi->td.mb.me_sf = &cpi->me_sf;
2658
2659   cm->error.setjmp = 0;
2660
2661 #if CONFIG_RATE_CTRL
2662   encode_command_init(&cpi->encode_command);
2663   partition_info_init(cpi);
2664   motion_vector_info_init(cpi);
2665   fp_motion_vector_info_init(cpi);
2666 #endif
2667
2668   return cpi;
2669 }
2670
2671 #if CONFIG_INTERNAL_STATS
2672 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2673
2674 #define SNPRINT2(H, T, V) \
2675   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2676 #endif  // CONFIG_INTERNAL_STATS
2677
2678 static void free_tpl_buffer(VP9_COMP *cpi);
2679
2680 void vp9_remove_compressor(VP9_COMP *cpi) {
2681   VP9_COMMON *cm;
2682   unsigned int i;
2683   int t;
2684
2685   if (!cpi) return;
2686
2687 #if CONFIG_INTERNAL_STATS
2688   vpx_free(cpi->ssim_vars);
2689 #endif
2690
2691   cm = &cpi->common;
2692   if (cm->current_video_frame > 0) {
2693 #if CONFIG_INTERNAL_STATS
2694     vpx_clear_system_state();
2695
2696     if (cpi->oxcf.pass != 1) {
2697       char headings[512] = { 0 };
2698       char results[512] = { 0 };
2699       FILE *f = fopen("opsnr.stt", "a");
2700       double time_encoded =
2701           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2702           10000000.000;
2703       double total_encode_time =
2704           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2705       const double dr =
2706           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2707       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2708       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2709       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2710
2711       if (cpi->b_calculate_psnr) {
2712         const double total_psnr = vpx_sse_to_psnr(
2713             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2714         const double totalp_psnr = vpx_sse_to_psnr(
2715             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2716         const double total_ssim =
2717             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2718         const double totalp_ssim =
2719             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2720
2721         snprintf(headings, sizeof(headings),
2722                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2723                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2724                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2725                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2726         snprintf(results, sizeof(results),
2727                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2728                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2729                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2730                  "%7.3f\t%7.3f\t%7.3f",
2731                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2732                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2733                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2734                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2735                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2736                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2737                  cpi->psnr.stat[V] / cpi->count);
2738
2739         if (cpi->b_calculate_blockiness) {
2740           SNPRINT(headings, "\t  Block\tWstBlck");
2741           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2742           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2743         }
2744
2745         if (cpi->b_calculate_consistency) {
2746           double consistency =
2747               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2748                               (double)cpi->total_inconsistency);
2749
2750           SNPRINT(headings, "\tConsist\tWstCons");
2751           SNPRINT2(results, "\t%7.3f", consistency);
2752           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2753         }
2754
2755         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2756         SNPRINT2(results, "\t%8.0f", total_encode_time);
2757         SNPRINT2(results, "\t%7.2f", rate_err);
2758         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2759
2760         fprintf(f, "%s\tAPsnr611\n", headings);
2761         fprintf(
2762             f, "%s\t%7.3f\n", results,
2763             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2764                 (cpi->count * 8));
2765       }
2766
2767       fclose(f);
2768     }
2769 #endif
2770
2771 #if 0
2772     {
2773       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2774       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2775       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2776              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2777              cpi->time_compress_data / 1000,
2778              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2779     }
2780 #endif
2781   }
2782
2783 #if CONFIG_VP9_TEMPORAL_DENOISING
2784   vp9_denoiser_free(&(cpi->denoiser));
2785 #endif
2786
2787   if (cpi->kmeans_data_arr_alloc) {
2788 #if CONFIG_MULTITHREAD
2789     pthread_mutex_destroy(&cpi->kmeans_mutex);
2790 #endif
2791     vpx_free(cpi->kmeans_data_arr);
2792   }
2793
2794   free_tpl_buffer(cpi);
2795
2796   for (t = 0; t < cpi->num_workers; ++t) {
2797     VPxWorker *const worker = &cpi->workers[t];
2798     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2799
2800     // Deallocate allocated threads.
2801     vpx_get_worker_interface()->end(worker);
2802
2803     // Deallocate allocated thread data.
2804     if (t < cpi->num_workers - 1) {
2805       vpx_free(thread_data->td->counts);
2806       vp9_free_pc_tree(thread_data->td);
2807       vpx_free(thread_data->td);
2808     }
2809   }
2810   vpx_free(cpi->tile_thr_data);
2811   vpx_free(cpi->workers);
2812   vp9_row_mt_mem_dealloc(cpi);
2813
2814   if (cpi->num_workers > 1) {
2815     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2816     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2817   }
2818
2819 #if !CONFIG_REALTIME_ONLY
2820   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2821 #endif
2822
2823   dealloc_compressor_data(cpi);
2824
2825   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2826        ++i) {
2827     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2828   }
2829
2830 #if CONFIG_FP_MB_STATS
2831   if (cpi->use_fp_mb_stats) {
2832     vpx_free(cpi->twopass.frame_mb_stats_buf);
2833     cpi->twopass.frame_mb_stats_buf = NULL;
2834   }
2835 #endif
2836
2837   vp9_remove_common(cm);
2838   vp9_free_ref_frame_buffers(cm->buffer_pool);
2839 #if CONFIG_VP9_POSTPROC
2840   vp9_free_postproc_buffers(cm);
2841 #endif
2842   vpx_free(cpi);
2843
2844 #if CONFIG_VP9_TEMPORAL_DENOISING
2845 #ifdef OUTPUT_YUV_DENOISED
2846   fclose(yuv_denoised_file);
2847 #endif
2848 #endif
2849 #ifdef OUTPUT_YUV_SKINMAP
2850   fclose(yuv_skinmap_file);
2851 #endif
2852 #ifdef OUTPUT_YUV_REC
2853   fclose(yuv_rec_file);
2854 #endif
2855 #ifdef OUTPUT_YUV_SVC_SRC
2856   fclose(yuv_svc_src[0]);
2857   fclose(yuv_svc_src[1]);
2858   fclose(yuv_svc_src[2]);
2859 #endif
2860
2861 #if 0
2862
2863   if (keyfile)
2864     fclose(keyfile);
2865
2866   if (framepsnr)
2867     fclose(framepsnr);
2868
2869   if (kf_list)
2870     fclose(kf_list);
2871
2872 #endif
2873 }
2874
2875 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2876   if (is_psnr_calc_enabled(cpi)) {
2877 #if CONFIG_VP9_HIGHBITDEPTH
2878     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2879                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2880 #else
2881     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2882 #endif
2883     return 1;
2884   } else {
2885     vp9_zero(*psnr);
2886     return 0;
2887   }
2888 }
2889
2890 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2891   if (ref_frame_flags > 7) return -1;
2892
2893   cpi->ref_frame_flags = ref_frame_flags;
2894   return 0;
2895 }
2896
2897 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2898   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2899   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2900   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2901   cpi->ext_refresh_frame_flags_pending = 1;
2902 }
2903
2904 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2905     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2906   MV_REFERENCE_FRAME ref_frame = NONE;
2907   if (ref_frame_flag == VP9_LAST_FLAG)
2908     ref_frame = LAST_FRAME;
2909   else if (ref_frame_flag == VP9_GOLD_FLAG)
2910     ref_frame = GOLDEN_FRAME;
2911   else if (ref_frame_flag == VP9_ALT_FLAG)
2912     ref_frame = ALTREF_FRAME;
2913
2914   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2915 }
2916
2917 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2918                            YV12_BUFFER_CONFIG *sd) {
2919   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2920   if (cfg) {
2921     vpx_yv12_copy_frame(cfg, sd);
2922     return 0;
2923   } else {
2924     return -1;
2925   }
2926 }
2927
2928 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2929                           YV12_BUFFER_CONFIG *sd) {
2930   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2931   if (cfg) {
2932     vpx_yv12_copy_frame(sd, cfg);
2933     return 0;
2934   } else {
2935     return -1;
2936   }
2937 }
2938
2939 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2940   cpi->ext_refresh_frame_context = update;
2941   cpi->ext_refresh_frame_context_pending = 1;
2942   return 0;
2943 }
2944
2945 #ifdef OUTPUT_YUV_REC
2946 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2947   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2948   uint8_t *src = s->y_buffer;
2949   int h = cm->height;
2950
2951 #if CONFIG_VP9_HIGHBITDEPTH
2952   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2953     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2954
2955     do {
2956       fwrite(src16, s->y_width, 2, yuv_rec_file);
2957       src16 += s->y_stride;
2958     } while (--h);
2959
2960     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2961     h = s->uv_height;
2962
2963     do {
2964       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2965       src16 += s->uv_stride;
2966     } while (--h);
2967
2968     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2969     h = s->uv_height;
2970
2971     do {
2972       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2973       src16 += s->uv_stride;
2974     } while (--h);
2975
2976     fflush(yuv_rec_file);
2977     return;
2978   }
2979 #endif  // CONFIG_VP9_HIGHBITDEPTH
2980
2981   do {
2982     fwrite(src, s->y_width, 1, yuv_rec_file);
2983     src += s->y_stride;
2984   } while (--h);
2985
2986   src = s->u_buffer;
2987   h = s->uv_height;
2988
2989   do {
2990     fwrite(src, s->uv_width, 1, yuv_rec_file);
2991     src += s->uv_stride;
2992   } while (--h);
2993
2994   src = s->v_buffer;
2995   h = s->uv_height;
2996
2997   do {
2998     fwrite(src, s->uv_width, 1, yuv_rec_file);
2999     src += s->uv_stride;
3000   } while (--h);
3001
3002   fflush(yuv_rec_file);
3003 }
3004 #endif
3005
3006 #if CONFIG_VP9_HIGHBITDEPTH
3007 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3008                                                 YV12_BUFFER_CONFIG *dst,
3009                                                 int bd) {
3010 #else
3011 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
3012                                                 YV12_BUFFER_CONFIG *dst) {
3013 #endif  // CONFIG_VP9_HIGHBITDEPTH
3014   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
3015   int i;
3016   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3017                                    src->v_buffer };
3018   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3019   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
3020                               src->uv_crop_width };
3021   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
3022                                src->uv_crop_height };
3023   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3024   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3025   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
3026                               dst->uv_crop_width };
3027   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
3028                                dst->uv_crop_height };
3029
3030   for (i = 0; i < MAX_MB_PLANE; ++i) {
3031 #if CONFIG_VP9_HIGHBITDEPTH
3032     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3033       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
3034                               src_strides[i], dsts[i], dst_heights[i],
3035                               dst_widths[i], dst_strides[i], bd);
3036     } else {
3037       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3038                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3039     }
3040 #else
3041     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
3042                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
3043 #endif  // CONFIG_VP9_HIGHBITDEPTH
3044   }
3045   vpx_extend_frame_borders(dst);
3046 }
3047
3048 #if CONFIG_VP9_HIGHBITDEPTH
3049 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
3050                                    YV12_BUFFER_CONFIG *dst, int bd,
3051                                    INTERP_FILTER filter_type,
3052                                    int phase_scaler) {
3053   const int src_w = src->y_crop_width;
3054   const int src_h = src->y_crop_height;
3055   const int dst_w = dst->y_crop_width;
3056   const int dst_h = dst->y_crop_height;
3057   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
3058                                    src->v_buffer };
3059   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
3060   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
3061   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
3062   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
3063   int x, y, i;
3064
3065   for (i = 0; i < MAX_MB_PLANE; ++i) {
3066     const int factor = (i == 0 || i == 3 ? 1 : 2);
3067     const int src_stride = src_strides[i];
3068     const int dst_stride = dst_strides[i];
3069     for (y = 0; y < dst_h; y += 16) {
3070       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
3071       for (x = 0; x < dst_w; x += 16) {
3072         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
3073         const uint8_t *src_ptr = srcs[i] +
3074                                  (y / factor) * src_h / dst_h * src_stride +
3075                                  (x / factor) * src_w / dst_w;
3076         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
3077
3078         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
3079           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
3080                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
3081                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3082                                16 * src_h / dst_h, 16 / factor, 16 / factor,
3083                                bd);
3084         } else {
3085           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
3086                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
3087                         16 * src_h / dst_h, 16 / factor, 16 / factor);
3088         }
3089       }
3090     }
3091   }
3092
3093   vpx_extend_frame_borders(dst);
3094 }
3095 #endif  // CONFIG_VP9_HIGHBITDEPTH
3096
3097 #if !CONFIG_REALTIME_ONLY
3098 static int scale_down(VP9_COMP *cpi, int q) {
3099   RATE_CONTROL *const rc = &cpi->rc;
3100   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3101   int scale = 0;
3102   assert(frame_is_kf_gf_arf(cpi));
3103
3104   if (rc->frame_size_selector == UNSCALED &&
3105       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
3106     const int max_size_thresh =
3107         (int)(rate_thresh_mult[SCALE_STEP1] *
3108               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
3109     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
3110   }
3111   return scale;
3112 }
3113
3114 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3115   const RATE_CONTROL *const rc = &cpi->rc;
3116   int big_miss_high;
3117
3118   if (frame_is_kf_gf_arf(cpi))
3119     big_miss_high = rc->this_frame_target * 3 / 2;
3120   else
3121     big_miss_high = rc->this_frame_target * 2;
3122
3123   return big_miss_high;
3124 }
3125
3126 static int big_rate_miss(VP9_COMP *cpi) {
3127   const RATE_CONTROL *const rc = &cpi->rc;
3128   int big_miss_high;
3129   int big_miss_low;
3130
3131   // Ignore for overlay frames
3132   if (rc->is_src_frame_alt_ref) {
3133     return 0;
3134   } else {
3135     big_miss_low = (rc->this_frame_target / 2);
3136     big_miss_high = big_rate_miss_high_threshold(cpi);
3137
3138     return (rc->projected_frame_size > big_miss_high) ||
3139            (rc->projected_frame_size < big_miss_low);
3140   }
3141 }
3142
3143 // test in two pass for the first
3144 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3145   if (cpi->oxcf.pass == 2) {
3146     TWO_PASS *const twopass = &cpi->twopass;
3147     GF_GROUP *const gf_group = &twopass->gf_group;
3148     const int gfg_index = gf_group->index;
3149
3150     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3151     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3152            gf_group->update_type[gfg_index] == LF_UPDATE;
3153   } else {
3154     return 0;
3155   }
3156 }
3157
3158 // Function to test for conditions that indicate we should loop
3159 // back and recode a frame.
3160 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3161                             int maxq, int minq) {
3162   const RATE_CONTROL *const rc = &cpi->rc;
3163   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3164   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3165   int force_recode = 0;
3166
3167   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3168       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3169       (two_pass_first_group_inter(cpi) &&
3170        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3171       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3172     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3173         scale_down(cpi, q)) {
3174       // Code this group at a lower resolution.
3175       cpi->resize_pending = 1;
3176       return 1;
3177     }
3178
3179     // Force recode for extreme overshoot.
3180     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3181         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3182          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3183       return 1;
3184     }
3185
3186     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3187     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3188         (rc->projected_frame_size < low_limit && q > minq)) {
3189       force_recode = 1;
3190     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3191       // Deal with frame undershoot and whether or not we are
3192       // below the automatically set cq level.
3193       if (q > oxcf->cq_level &&
3194           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3195         force_recode = 1;
3196       }
3197     }
3198   }
3199   return force_recode;
3200 }
3201 #endif  // !CONFIG_REALTIME_ONLY
3202
3203 static void update_ref_frames(VP9_COMP *cpi) {
3204   VP9_COMMON *const cm = &cpi->common;
3205   BufferPool *const pool = cm->buffer_pool;
3206   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3207
3208   if (cpi->rc.show_arf_as_gld) {
3209     int tmp = cpi->alt_fb_idx;
3210     cpi->alt_fb_idx = cpi->gld_fb_idx;
3211     cpi->gld_fb_idx = tmp;
3212   } else if (cm->show_existing_frame) {
3213     // Pop ARF.
3214     cpi->lst_fb_idx = cpi->alt_fb_idx;
3215     cpi->alt_fb_idx =
3216         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3217     --gf_group->stack_size;
3218   }
3219
3220   // At this point the new frame has been encoded.
3221   // If any buffer copy / swapping is signaled it should be done here.
3222   if (cm->frame_type == KEY_FRAME) {
3223     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3224                cm->new_fb_idx);
3225     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3226                cm->new_fb_idx);
3227   } else if (vp9_preserve_existing_gf(cpi)) {
3228     // We have decided to preserve the previously existing golden frame as our
3229     // new ARF frame. However, in the short term in function
3230     // vp9_get_refresh_mask() we left it in the GF slot and, if
3231     // we're updating the GF with the current decoded frame, we save it to the
3232     // ARF slot instead.
3233     // We now have to update the ARF with the current frame and swap gld_fb_idx
3234     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3235     // slot and, if we're updating the GF, the current frame becomes the new GF.
3236     int tmp;
3237
3238     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3239                cm->new_fb_idx);
3240
3241     tmp = cpi->alt_fb_idx;
3242     cpi->alt_fb_idx = cpi->gld_fb_idx;
3243     cpi->gld_fb_idx = tmp;
3244   } else { /* For non key/golden frames */
3245     if (cpi->refresh_alt_ref_frame) {
3246       int arf_idx = gf_group->top_arf_idx;
3247
3248       // Push new ARF into stack.
3249       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3250                  gf_group->stack_size);
3251       ++gf_group->stack_size;
3252
3253       assert(arf_idx < REF_FRAMES);
3254
3255       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3256       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3257              cpi->interp_filter_selected[0],
3258              sizeof(cpi->interp_filter_selected[0]));
3259
3260       cpi->alt_fb_idx = arf_idx;
3261     }
3262
3263     if (cpi->refresh_golden_frame) {
3264       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3265                  cm->new_fb_idx);
3266       if (!cpi->rc.is_src_frame_alt_ref)
3267         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3268                cpi->interp_filter_selected[0],
3269                sizeof(cpi->interp_filter_selected[0]));
3270       else
3271         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3272                cpi->interp_filter_selected[ALTREF_FRAME],
3273                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3274     }
3275   }
3276
3277   if (cpi->refresh_last_frame) {
3278     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3279                cm->new_fb_idx);
3280     if (!cpi->rc.is_src_frame_alt_ref)
3281       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3282              cpi->interp_filter_selected[0],
3283              sizeof(cpi->interp_filter_selected[0]));
3284   }
3285
3286   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3287     cpi->alt_fb_idx =
3288         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3289     --gf_group->stack_size;
3290   }
3291 }
3292
3293 void vp9_update_reference_frames(VP9_COMP *cpi) {
3294   update_ref_frames(cpi);
3295
3296 #if CONFIG_VP9_TEMPORAL_DENOISING
3297   vp9_denoiser_update_ref_frame(cpi);
3298 #endif
3299
3300   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3301 }
3302
3303 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3304   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3305   struct loopfilter *lf = &cm->lf;
3306   int is_reference_frame =
3307       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3308        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3309   if (cpi->use_svc &&
3310       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3311     is_reference_frame = !cpi->svc.non_reference_frame;
3312
3313   // Skip loop filter in show_existing_frame mode.
3314   if (cm->show_existing_frame) {
3315     lf->filter_level = 0;
3316     return;
3317   }
3318
3319   if (xd->lossless) {
3320     lf->filter_level = 0;
3321     lf->last_filt_level = 0;
3322   } else {
3323     struct vpx_usec_timer timer;
3324
3325     vpx_clear_system_state();
3326
3327     vpx_usec_timer_start(&timer);
3328
3329     if (!cpi->rc.is_src_frame_alt_ref) {
3330       if ((cpi->common.frame_type == KEY_FRAME) &&
3331           (!cpi->rc.this_key_frame_forced)) {
3332         lf->last_filt_level = 0;
3333       }
3334       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3335       lf->last_filt_level = lf->filter_level;
3336     } else {
3337       lf->filter_level = 0;
3338     }
3339
3340     vpx_usec_timer_mark(&timer);
3341     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3342   }
3343
3344   if (lf->filter_level > 0 && is_reference_frame) {
3345     vp9_build_mask_frame(cm, lf->filter_level, 0);
3346
3347     if (cpi->num_workers > 1)
3348       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3349                                lf->filter_level, 0, 0, cpi->workers,
3350                                cpi->num_workers, &cpi->lf_row_sync);
3351     else
3352       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3353   }
3354
3355   vpx_extend_frame_inner_borders(cm->frame_to_show);
3356 }
3357
3358 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3359   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3360   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3361       new_fb_ptr->mi_cols < cm->mi_cols) {
3362     vpx_free(new_fb_ptr->mvs);
3363     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3364                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3365                                          sizeof(*new_fb_ptr->mvs)));
3366     new_fb_ptr->mi_rows = cm->mi_rows;
3367     new_fb_ptr->mi_cols = cm->mi_cols;
3368   }
3369 }
3370
3371 void vp9_scale_references(VP9_COMP *cpi) {
3372   VP9_COMMON *cm = &cpi->common;
3373   MV_REFERENCE_FRAME ref_frame;
3374   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3375                                      VP9_ALT_FLAG };
3376
3377   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3378     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3379     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3380       BufferPool *const pool = cm->buffer_pool;
3381       const YV12_BUFFER_CONFIG *const ref =
3382           get_ref_frame_buffer(cpi, ref_frame);
3383
3384       if (ref == NULL) {
3385         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3386         continue;
3387       }
3388
3389 #if CONFIG_VP9_HIGHBITDEPTH
3390       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3391         RefCntBuffer *new_fb_ptr = NULL;
3392         int force_scaling = 0;
3393         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3394         if (new_fb == INVALID_IDX) {
3395           new_fb = get_free_fb(cm);
3396           force_scaling = 1;
3397         }
3398         if (new_fb == INVALID_IDX) return;
3399         new_fb_ptr = &pool->frame_bufs[new_fb];
3400         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3401             new_fb_ptr->buf.y_crop_height != cm->height) {
3402           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3403                                        cm->subsampling_x, cm->subsampling_y,
3404                                        cm->use_highbitdepth,
3405                                        VP9_ENC_BORDER_IN_PIXELS,
3406                                        cm->byte_alignment, NULL, NULL, NULL))
3407             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3408                                "Failed to allocate frame buffer");
3409           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3410                                  EIGHTTAP, 0);
3411           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3412           alloc_frame_mvs(cm, new_fb);
3413         }
3414 #else
3415       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3416         RefCntBuffer *new_fb_ptr = NULL;
3417         int force_scaling = 0;
3418         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3419         if (new_fb == INVALID_IDX) {
3420           new_fb = get_free_fb(cm);
3421           force_scaling = 1;
3422         }
3423         if (new_fb == INVALID_IDX) return;
3424         new_fb_ptr = &pool->frame_bufs[new_fb];
3425         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3426             new_fb_ptr->buf.y_crop_height != cm->height) {
3427           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3428                                        cm->subsampling_x, cm->subsampling_y,
3429                                        VP9_ENC_BORDER_IN_PIXELS,
3430                                        cm->byte_alignment, NULL, NULL, NULL))
3431             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3432                                "Failed to allocate frame buffer");
3433           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3434           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3435           alloc_frame_mvs(cm, new_fb);
3436         }
3437 #endif  // CONFIG_VP9_HIGHBITDEPTH
3438       } else {
3439         int buf_idx;
3440         RefCntBuffer *buf = NULL;
3441         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3442           // Check for release of scaled reference.
3443           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3444           if (buf_idx != INVALID_IDX) {
3445             buf = &pool->frame_bufs[buf_idx];
3446             --buf->ref_count;
3447             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3448           }
3449         }
3450         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3451         buf = &pool->frame_bufs[buf_idx];
3452         buf->buf.y_crop_width = ref->y_crop_width;
3453         buf->buf.y_crop_height = ref->y_crop_height;
3454         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3455         ++buf->ref_count;
3456       }
3457     } else {
3458       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3459         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3460     }
3461   }
3462 }
3463
3464 static void release_scaled_references(VP9_COMP *cpi) {
3465   VP9_COMMON *cm = &cpi->common;
3466   int i;
3467   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3468     // Only release scaled references under certain conditions:
3469     // if reference will be updated, or if scaled reference has same resolution.
3470     int refresh[3];
3471     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3472     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3473     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3474     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3475       const int idx = cpi->scaled_ref_idx[i - 1];
3476       if (idx != INVALID_IDX) {
3477         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3478         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3479         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3480                                buf->buf.y_crop_height == ref->y_crop_height)) {
3481           --buf->ref_count;
3482           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3483         }
3484       }
3485     }
3486   } else {
3487     for (i = 0; i < REFS_PER_FRAME; ++i) {
3488       const int idx = cpi->scaled_ref_idx[i];
3489       if (idx != INVALID_IDX) {
3490         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3491         --buf->ref_count;
3492         cpi->scaled_ref_idx[i] = INVALID_IDX;
3493       }
3494     }
3495   }
3496 }
3497
3498 static void full_to_model_count(unsigned int *model_count,
3499                                 unsigned int *full_count) {
3500   int n;
3501   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3502   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3503   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3504   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3505     model_count[TWO_TOKEN] += full_count[n];
3506   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3507 }
3508
3509 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3510                                  vp9_coeff_count *full_count) {
3511   int i, j, k, l;
3512
3513   for (i = 0; i < PLANE_TYPES; ++i)
3514     for (j = 0; j < REF_TYPES; ++j)
3515       for (k = 0; k < COEF_BANDS; ++k)
3516         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3517           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3518 }
3519
3520 #if 0 && CONFIG_INTERNAL_STATS
3521 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3522   VP9_COMMON *const cm = &cpi->common;
3523   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3524   int64_t recon_err;
3525
3526   vpx_clear_system_state();
3527
3528 #if CONFIG_VP9_HIGHBITDEPTH
3529   if (cm->use_highbitdepth) {
3530     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3531   } else {
3532     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3533   }
3534 #else
3535   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3536 #endif  // CONFIG_VP9_HIGHBITDEPTH
3537
3538
3539   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3540     double dc_quant_devisor;
3541 #if CONFIG_VP9_HIGHBITDEPTH
3542     switch (cm->bit_depth) {
3543       case VPX_BITS_8:
3544         dc_quant_devisor = 4.0;
3545         break;
3546       case VPX_BITS_10:
3547         dc_quant_devisor = 16.0;
3548         break;
3549       default:
3550         assert(cm->bit_depth == VPX_BITS_12);
3551         dc_quant_devisor = 64.0;
3552         break;
3553     }
3554 #else
3555     dc_quant_devisor = 4.0;
3556 #endif
3557
3558     if (!cm->current_video_frame) {
3559       fprintf(f, "frame, width, height, last ts, last end ts, "
3560           "source_alt_ref_pending, source_alt_ref_active, "
3561           "this_frame_target, projected_frame_size, "
3562           "projected_frame_size / MBs, "
3563           "projected_frame_size - this_frame_target, "
3564           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3565           "twopass.extend_minq, twopass.extend_minq_fast, "
3566           "total_target_vs_actual, "
3567           "starting_buffer_level - bits_off_target, "
3568           "total_actual_bits, base_qindex, q for base_qindex, "
3569           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3570           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3571           "frame_type, gfu_boost, "
3572           "twopass.bits_left, "
3573           "twopass.total_left_stats.coded_error, "
3574           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3575           "tot_recode_hits, recon_err, kf_boost, "
3576           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3577           "filter_level, seg.aq_av_offset\n");
3578     }
3579
3580     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3581         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3582         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3583         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3584         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3585         cpi->common.current_video_frame,
3586         cm->width, cm->height,
3587         cpi->last_time_stamp_seen,
3588         cpi->last_end_time_stamp_seen,
3589         cpi->rc.source_alt_ref_pending,
3590         cpi->rc.source_alt_ref_active,
3591         cpi->rc.this_frame_target,
3592         cpi->rc.projected_frame_size,
3593         cpi->rc.projected_frame_size / cpi->common.MBs,
3594         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3595         cpi->rc.vbr_bits_off_target,
3596         cpi->rc.vbr_bits_off_target_fast,
3597         cpi->twopass.extend_minq,
3598         cpi->twopass.extend_minq_fast,
3599         cpi->rc.total_target_vs_actual,
3600         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3601         cpi->rc.total_actual_bits, cm->base_qindex,
3602         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3603         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3604             dc_quant_devisor,
3605         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3606                                 cm->bit_depth),
3607         cpi->rc.avg_q,
3608         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3609         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3610         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3611         cpi->twopass.bits_left,
3612         cpi->twopass.total_left_stats.coded_error,
3613         cpi->twopass.bits_left /
3614             (1 + cpi->twopass.total_left_stats.coded_error),
3615         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3616         cpi->twopass.kf_zeromotion_pct,
3617         cpi->twopass.fr_content_type,
3618         cm->lf.filter_level,
3619         cm->seg.aq_av_offset);
3620   }
3621   fclose(f);
3622
3623   if (0) {
3624     FILE *const fmodes = fopen("Modes.stt", "a");
3625     int i;
3626
3627     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3628             cm->frame_type, cpi->refresh_golden_frame,
3629             cpi->refresh_alt_ref_frame);
3630
3631     for (i = 0; i < MAX_MODES; ++i)
3632       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3633
3634     fprintf(fmodes, "\n");
3635
3636     fclose(fmodes);
3637   }
3638 }
3639 #endif
3640
3641 static void set_mv_search_params(VP9_COMP *cpi) {
3642   const VP9_COMMON *const cm = &cpi->common;
3643   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3644
3645   // Default based on max resolution.
3646   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3647
3648   if (cpi->sf.mv.auto_mv_step_size) {
3649     if (frame_is_intra_only(cm)) {
3650       // Initialize max_mv_magnitude for use in the first INTER frame
3651       // after a key/intra-only frame.
3652       cpi->max_mv_magnitude = max_mv_def;
3653     } else {
3654       if (cm->show_frame) {
3655         // Allow mv_steps to correspond to twice the max mv magnitude found
3656         // in the previous frame, capped by the default max_mv_magnitude based
3657         // on resolution.
3658         cpi->mv_step_param = vp9_init_search_range(
3659             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3660       }
3661       cpi->max_mv_magnitude = 0;
3662     }
3663   }
3664 }
3665
3666 static void set_size_independent_vars(VP9_COMP *cpi) {
3667   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3668   vp9_set_rd_speed_thresholds(cpi);
3669   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3670   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3671 }
3672
3673 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3674                                     int *top_index) {
3675   VP9_COMMON *const cm = &cpi->common;
3676
3677   // Setup variables that depend on the dimensions of the frame.
3678   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3679
3680   // Decide q and q bounds.
3681   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3682
3683   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3684     *q = cpi->rc.worst_quality;
3685     cpi->rc.force_max_q = 0;
3686   }
3687
3688   if (!frame_is_intra_only(cm)) {
3689     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3690   }
3691
3692 #if !CONFIG_REALTIME_ONLY
3693   // Configure experimental use of segmentation for enhanced coding of
3694   // static regions if indicated.
3695   // Only allowed in the second pass of a two pass encode, as it requires
3696   // lagged coding, and if the relevant speed feature flag is set.
3697   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3698     configure_static_seg_features(cpi);
3699 #endif  // !CONFIG_REALTIME_ONLY
3700
3701 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3702   if (cpi->oxcf.noise_sensitivity > 0) {
3703     int l = 0;
3704     switch (cpi->oxcf.noise_sensitivity) {
3705       case 1: l = 20; break;
3706       case 2: l = 40; break;
3707       case 3: l = 60; break;
3708       case 4:
3709       case 5: l = 100; break;
3710       case 6: l = 150; break;
3711     }
3712     if (!cpi->common.postproc_state.limits) {
3713       cpi->common.postproc_state.limits =
3714           vpx_calloc(cpi->un_scaled_source->y_width,
3715                      sizeof(*cpi->common.postproc_state.limits));
3716     }
3717     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3718                 cpi->common.postproc_state.limits);
3719   }
3720 #endif  // CONFIG_VP9_POSTPROC
3721 }
3722
3723 static void init_motion_estimation(VP9_COMP *cpi) {
3724   int y_stride = cpi->scaled_source.y_stride;
3725
3726   if (cpi->sf.mv.search_method == NSTEP) {
3727     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3728   } else if (cpi->sf.mv.search_method == DIAMOND) {
3729     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3730   }
3731 }
3732
3733 static void set_frame_size(VP9_COMP *cpi) {
3734   int ref_frame;
3735   VP9_COMMON *const cm = &cpi->common;
3736   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3737   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3738
3739 #if !CONFIG_REALTIME_ONLY
3740   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3741       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3742        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3743     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3744                          &oxcf->scaled_frame_height);
3745
3746     // There has been a change in frame size.
3747     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3748                          oxcf->scaled_frame_height);
3749   }
3750 #endif  // !CONFIG_REALTIME_ONLY
3751
3752   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
3753       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3754     // For SVC scaled width/height will have been set (svc->resize_set=1)
3755     // in get_svc_params based on the layer width/height.
3756     if (!cpi->use_svc || !cpi->svc.resize_set) {
3757       oxcf->scaled_frame_width =
3758           (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3759       oxcf->scaled_frame_height =
3760           (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3761       // There has been a change in frame size.
3762       vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3763                            oxcf->scaled_frame_height);
3764     }
3765
3766     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3767     set_mv_search_params(cpi);
3768
3769     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3770 #if CONFIG_VP9_TEMPORAL_DENOISING
3771     // Reset the denoiser on the resized frame.
3772     if (cpi->oxcf.noise_sensitivity > 0) {
3773       vp9_denoiser_free(&(cpi->denoiser));
3774       setup_denoiser_buffer(cpi);
3775       // Dynamic resize is only triggered for non-SVC, so we can force
3776       // golden frame update here as temporary fix to denoiser.
3777       cpi->refresh_golden_frame = 1;
3778     }
3779 #endif
3780   }
3781
3782   if ((oxcf->pass == 2) && !cpi->use_svc) {
3783     vp9_set_target_rate(cpi);
3784   }
3785
3786   alloc_frame_mvs(cm, cm->new_fb_idx);
3787
3788   // Reset the frame pointers to the current frame size.
3789   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3790                                cm->subsampling_x, cm->subsampling_y,
3791 #if CONFIG_VP9_HIGHBITDEPTH
3792                                cm->use_highbitdepth,
3793 #endif
3794                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3795                                NULL, NULL, NULL))
3796     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3797                        "Failed to allocate frame buffer");
3798
3799   alloc_util_frame_buffers(cpi);
3800   init_motion_estimation(cpi);
3801
3802   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3803     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3804     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3805
3806     ref_buf->idx = buf_idx;
3807
3808     if (buf_idx != INVALID_IDX) {
3809       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3810       ref_buf->buf = buf;
3811 #if CONFIG_VP9_HIGHBITDEPTH
3812       vp9_setup_scale_factors_for_frame(
3813           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3814           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3815 #else
3816       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3817                                         buf->y_crop_height, cm->width,
3818                                         cm->height);
3819 #endif  // CONFIG_VP9_HIGHBITDEPTH
3820       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3821     } else {
3822       ref_buf->buf = NULL;
3823     }
3824   }
3825
3826   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3827 }
3828
3829 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3830 static void save_encode_params(VP9_COMP *cpi) {
3831   VP9_COMMON *const cm = &cpi->common;
3832   const int tile_cols = 1 << cm->log2_tile_cols;
3833   const int tile_rows = 1 << cm->log2_tile_rows;
3834   int tile_col, tile_row;
3835   int i, j;
3836   RD_OPT *rd_opt = &cpi->rd;
3837   for (i = 0; i < MAX_REF_FRAMES; i++) {
3838     for (j = 0; j < REFERENCE_MODES; j++)
3839       rd_opt->prediction_type_threshes_prev[i][j] =
3840           rd_opt->prediction_type_threshes[i][j];
3841
3842     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3843       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3844   }
3845
3846   if (cpi->tile_data != NULL) {
3847     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3848       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3849         TileDataEnc *tile_data =
3850             &cpi->tile_data[tile_row * tile_cols + tile_col];
3851         for (i = 0; i < BLOCK_SIZES; ++i) {
3852           for (j = 0; j < MAX_MODES; ++j) {
3853             tile_data->thresh_freq_fact_prev[i][j] =
3854                 tile_data->thresh_freq_fact[i][j];
3855           }
3856         }
3857       }
3858   }
3859 }
3860 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
3861
3862 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3863 #ifdef ENABLE_KF_DENOISE
3864   if (is_spatial_denoise_enabled(cpi)) {
3865     cpi->raw_source_frame = vp9_scale_if_required(
3866         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3867         (oxcf->pass == 0), EIGHTTAP, 0);
3868   } else {
3869     cpi->raw_source_frame = cpi->Source;
3870   }
3871 #else
3872   cpi->raw_source_frame = cpi->Source;
3873 #endif
3874 }
3875
3876 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3877                                       uint8_t *dest) {
3878   VP9_COMMON *const cm = &cpi->common;
3879   SVC *const svc = &cpi->svc;
3880   int q = 0, bottom_index = 0, top_index = 0;
3881   int no_drop_scene_change = 0;
3882   const INTERP_FILTER filter_scaler =
3883       (is_one_pass_cbr_svc(cpi))
3884           ? svc->downsample_filter_type[svc->spatial_layer_id]
3885           : EIGHTTAP;
3886   const int phase_scaler =
3887       (is_one_pass_cbr_svc(cpi))
3888           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3889           : 0;
3890
3891   if (cm->show_existing_frame) {
3892     cpi->rc.this_frame_target = 0;
3893     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3894     return 1;
3895   }
3896
3897   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3898
3899   // Flag to check if its valid to compute the source sad (used for
3900   // scene detection and for superblock content state in CBR mode).
3901   // The flag may get reset below based on SVC or resizing state.
3902   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3903
3904   vpx_clear_system_state();
3905
3906   set_frame_size(cpi);
3907
3908   if (is_one_pass_cbr_svc(cpi) &&
3909       cpi->un_scaled_source->y_width == cm->width << 2 &&
3910       cpi->un_scaled_source->y_height == cm->height << 2 &&
3911       svc->scaled_temp.y_width == cm->width << 1 &&
3912       svc->scaled_temp.y_height == cm->height << 1) {
3913     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3914     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3915     // result will be saved in scaled_temp and might be used later.
3916     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3917     const int phase_scaler2 = svc->downsample_filter_phase[1];
3918     cpi->Source = vp9_svc_twostage_scale(
3919         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3920         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3921     svc->scaled_one_half = 1;
3922   } else if (is_one_pass_cbr_svc(cpi) &&
3923              cpi->un_scaled_source->y_width == cm->width << 1 &&
3924              cpi->un_scaled_source->y_height == cm->height << 1 &&
3925              svc->scaled_one_half) {
3926     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3927     // two-stage scaling, use the result directly.
3928     cpi->Source = &svc->scaled_temp;
3929     svc->scaled_one_half = 0;
3930   } else {
3931     cpi->Source = vp9_scale_if_required(
3932         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3933         filter_scaler, phase_scaler);
3934   }
3935 #ifdef OUTPUT_YUV_SVC_SRC
3936   // Write out at most 3 spatial layers.
3937   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3938     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3939   }
3940 #endif
3941   // Unfiltered raw source used in metrics calculation if the source
3942   // has been filtered.
3943   if (is_psnr_calc_enabled(cpi)) {
3944 #ifdef ENABLE_KF_DENOISE
3945     if (is_spatial_denoise_enabled(cpi)) {
3946       cpi->raw_source_frame = vp9_scale_if_required(
3947           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3948           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3949     } else {
3950       cpi->raw_source_frame = cpi->Source;
3951     }
3952 #else
3953     cpi->raw_source_frame = cpi->Source;
3954 #endif
3955   }
3956
3957   if ((cpi->use_svc &&
3958        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3959         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3960         svc->current_superframe < 1)) ||
3961       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3962       cpi->resize_state != ORIG) {
3963     cpi->compute_source_sad_onepass = 0;
3964     if (cpi->content_state_sb_fd != NULL)
3965       memset(cpi->content_state_sb_fd, 0,
3966              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3967                  sizeof(*cpi->content_state_sb_fd));
3968   }
3969
3970   // Avoid scaling last_source unless its needed.
3971   // Last source is needed if avg_source_sad() is used, or if
3972   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3973   // estimation is enabled.
3974   if (cpi->unscaled_last_source != NULL &&
3975       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3976        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3977         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3978        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3979        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3980        cpi->compute_source_sad_onepass))
3981     cpi->Last_Source = vp9_scale_if_required(
3982         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3983         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3984
3985   if (cpi->Last_Source == NULL ||
3986       cpi->Last_Source->y_width != cpi->Source->y_width ||
3987       cpi->Last_Source->y_height != cpi->Source->y_height)
3988     cpi->compute_source_sad_onepass = 0;
3989
3990   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3991     memset(cpi->consec_zero_mv, 0,
3992            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3993   }
3994
3995 #if CONFIG_VP9_TEMPORAL_DENOISING
3996   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3997     vp9_denoiser_reset_on_first_frame(cpi);
3998 #endif
3999
4000   // Scene detection is always used for VBR mode or screen-content case.
4001   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
4002   // (need to check encoding time cost for doing this for speed 8).
4003   cpi->rc.high_source_sad = 0;
4004   cpi->rc.hybrid_intra_scene_change = 0;
4005   cpi->rc.re_encode_maxq_scene_change = 0;
4006   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
4007       (cpi->oxcf.rc_mode == VPX_VBR ||
4008        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
4009        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
4010     vp9_scene_detection_onepass(cpi);
4011
4012   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
4013     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
4014     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
4015     // On scene change reset temporal layer pattern to TL0.
4016     // Note that if the base/lower spatial layers are skipped: instead of
4017     // inserting base layer here, we force max-q for the next superframe
4018     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
4019     // when max-q is decided for the current layer.
4020     // Only do this reset for bypass/flexible mode.
4021     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
4022         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
4023       // rc->high_source_sad will get reset so copy it to restore it.
4024       int tmp_high_source_sad = cpi->rc.high_source_sad;
4025       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
4026       cpi->rc.high_source_sad = tmp_high_source_sad;
4027     }
4028   }
4029
4030   vp9_update_noise_estimate(cpi);
4031
4032   // For 1 pass CBR, check if we are dropping this frame.
4033   // Never drop on key frame, if base layer is key for svc,
4034   // on scene change, or if superframe has layer sync.
4035   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
4036       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
4037     no_drop_scene_change = 1;
4038   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
4039       !frame_is_intra_only(cm) && !no_drop_scene_change &&
4040       !svc->superframe_has_layer_sync &&
4041       (!cpi->use_svc ||
4042        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
4043     if (vp9_rc_drop_frame(cpi)) return 0;
4044   }
4045
4046   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
4047   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
4048   // avoid this frame-level upsampling (for non intra_only frames).
4049   // For SVC single_layer mode, dynamic resize is allowed and we need to
4050   // scale references for this case.
4051   if (frame_is_intra_only(cm) == 0 &&
4052       ((svc->single_layer_svc && cpi->oxcf.resize_mode == RESIZE_DYNAMIC) ||
4053        !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref))) {
4054     vp9_scale_references(cpi);
4055   }
4056
4057   set_size_independent_vars(cpi);
4058   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4059
4060   // search method and step parameter might be changed in speed settings.
4061   init_motion_estimation(cpi);
4062
4063   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
4064
4065   if (cpi->sf.svc_use_lowres_part &&
4066       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
4067     if (svc->prev_partition_svc == NULL) {
4068       CHECK_MEM_ERROR(
4069           cm, svc->prev_partition_svc,
4070           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
4071                                    sizeof(*svc->prev_partition_svc)));
4072     }
4073   }
4074
4075   // TODO(jianj): Look into issue of skin detection with high bitdepth.
4076   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
4077       cpi->oxcf.rc_mode == VPX_CBR &&
4078       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
4079       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4080     cpi->use_skin_detection = 1;
4081   }
4082
4083   // Enable post encode frame dropping for CBR on non key frame, when
4084   // ext_use_post_encode_drop is specified by user.
4085   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
4086                                  cpi->oxcf.rc_mode == VPX_CBR &&
4087                                  cm->frame_type != KEY_FRAME;
4088
4089   vp9_set_quantizer(cpi, q);
4090   vp9_set_variance_partition_thresholds(cpi, q, 0);
4091
4092   setup_frame(cpi);
4093
4094   suppress_active_map(cpi);
4095
4096   if (cpi->use_svc) {
4097     // On non-zero spatial layer, check for disabling inter-layer
4098     // prediction.
4099     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4100     vp9_svc_assert_constraints_pattern(cpi);
4101   }
4102
4103   if (cpi->rc.last_post_encode_dropped_scene_change) {
4104     cpi->rc.high_source_sad = 1;
4105     svc->high_source_sad_superframe = 1;
4106     // For now disable use_source_sad since Last_Source will not be the previous
4107     // encoded but the dropped one.
4108     cpi->sf.use_source_sad = 0;
4109     cpi->rc.last_post_encode_dropped_scene_change = 0;
4110   }
4111   // Check if this high_source_sad (scene/slide change) frame should be
4112   // encoded at high/max QP, and if so, set the q and adjust some rate
4113   // control parameters.
4114   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4115       (cpi->rc.high_source_sad ||
4116        (cpi->use_svc && svc->high_source_sad_superframe))) {
4117     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4118       vp9_set_quantizer(cpi, q);
4119       vp9_set_variance_partition_thresholds(cpi, q, 0);
4120     }
4121   }
4122
4123 #if !CONFIG_REALTIME_ONLY
4124   // Variance adaptive and in frame q adjustment experiments are mutually
4125   // exclusive.
4126   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4127     vp9_vaq_frame_setup(cpi);
4128   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4129     vp9_360aq_frame_setup(cpi);
4130   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4131     vp9_setup_in_frame_q_adj(cpi);
4132   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4133     // it may be pretty bad for rate-control,
4134     // and I should handle it somehow
4135     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4136   } else {
4137 #endif
4138     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4139       vp9_cyclic_refresh_setup(cpi);
4140     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4141       apply_roi_map(cpi);
4142     }
4143 #if !CONFIG_REALTIME_ONLY
4144   }
4145 #endif
4146
4147   apply_active_map(cpi);
4148
4149   vp9_encode_frame(cpi);
4150
4151   // Check if we should re-encode this frame at high Q because of high
4152   // overshoot based on the encoded frame size. Only for frames where
4153   // high temporal-source SAD is detected.
4154   // For SVC: all spatial layers are checked for re-encoding.
4155   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4156       (cpi->rc.high_source_sad ||
4157        (cpi->use_svc && svc->high_source_sad_superframe))) {
4158     int frame_size = 0;
4159     // Get an estimate of the encoded frame size.
4160     save_coding_context(cpi);
4161     vp9_pack_bitstream(cpi, dest, size);
4162     restore_coding_context(cpi);
4163     frame_size = (int)(*size) << 3;
4164     // Check if encoded frame will overshoot too much, and if so, set the q and
4165     // adjust some rate control parameters, and return to re-encode the frame.
4166     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4167       vpx_clear_system_state();
4168       vp9_set_quantizer(cpi, q);
4169       vp9_set_variance_partition_thresholds(cpi, q, 0);
4170       suppress_active_map(cpi);
4171       // Turn-off cyclic refresh for re-encoded frame.
4172       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4173         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4174         unsigned char *const seg_map = cpi->segmentation_map;
4175         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4176         memset(cr->last_coded_q_map, MAXQ,
4177                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4178         cr->sb_index = 0;
4179         vp9_disable_segmentation(&cm->seg);
4180       }
4181       apply_active_map(cpi);
4182       vp9_encode_frame(cpi);
4183     }
4184   }
4185
4186   // Update some stats from cyclic refresh, and check for golden frame update.
4187   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4188       !frame_is_intra_only(cm))
4189     vp9_cyclic_refresh_postencode(cpi);
4190
4191   // Update the skip mb flag probabilities based on the distribution
4192   // seen in the last encoder iteration.
4193   // update_base_skip_probs(cpi);
4194   vpx_clear_system_state();
4195   return 1;
4196 }
4197
4198 #if !CONFIG_REALTIME_ONLY
4199 #define MAX_QSTEP_ADJ 4
4200 static int get_qstep_adj(int rate_excess, int rate_limit) {
4201   int qstep =
4202       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4203   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4204 }
4205
4206 #if CONFIG_RATE_CTRL
4207 #define RATE_CTRL_MAX_RECODE_NUM 7
4208
4209 typedef struct RATE_QINDEX_HISTORY {
4210   int recode_count;
4211   int q_index_history[RATE_CTRL_MAX_RECODE_NUM];
4212   int rate_history[RATE_CTRL_MAX_RECODE_NUM];
4213   int q_index_high;
4214   int q_index_low;
4215 } RATE_QINDEX_HISTORY;
4216
4217 static void init_rq_history(RATE_QINDEX_HISTORY *rq_history) {
4218   rq_history->recode_count = 0;
4219   rq_history->q_index_high = 255;
4220   rq_history->q_index_low = 0;
4221 }
4222
4223 static void update_rq_history(RATE_QINDEX_HISTORY *rq_history, int target_bits,
4224                               int actual_bits, int q_index) {
4225   rq_history->q_index_history[rq_history->recode_count] = q_index;
4226   rq_history->rate_history[rq_history->recode_count] = actual_bits;
4227   if (actual_bits <= target_bits) {
4228     rq_history->q_index_high = q_index;
4229   }
4230   if (actual_bits >= target_bits) {
4231     rq_history->q_index_low = q_index;
4232   }
4233   rq_history->recode_count += 1;
4234 }
4235
4236 static int guess_q_index_from_model(const RATE_QSTEP_MODEL *rq_model,
4237                                     int target_bits) {
4238   // The model predicts bits as follows.
4239   // target_bits = bias - ratio * log2(q_step)
4240   // Given the target_bits, we compute the q_step as follows.
4241   const double q_step =
4242       pow(2.0, (rq_model->bias - target_bits) / rq_model->ratio);
4243   // TODO(angiebird): Make this function support highbitdepth.
4244   return vp9_convert_q_to_qindex(q_step, VPX_BITS_8);
4245 }
4246
4247 static int guess_q_index_linear(int prev_q_index, int target_bits,
4248                                 int actual_bits, int gap) {
4249   int q_index = prev_q_index;
4250   if (actual_bits < target_bits) {
4251     q_index -= gap;
4252     q_index = VPXMAX(q_index, 0);
4253   } else {
4254     q_index += gap;
4255     q_index = VPXMIN(q_index, 255);
4256   }
4257   return q_index;
4258 }
4259
4260 static double get_bits_percent_diff(int target_bits, int actual_bits) {
4261   double diff = abs(target_bits - actual_bits) * 1. / target_bits;
4262   return diff * 100;
4263 }
4264
4265 static int rq_model_predict_q_index(const RATE_QSTEP_MODEL *rq_model,
4266                                     const RATE_QINDEX_HISTORY *rq_history,
4267                                     int target_bits) {
4268   int q_index = -1;
4269   if (rq_history->recode_count > 0) {
4270     const int actual_bits =
4271         rq_history->rate_history[rq_history->recode_count - 1];
4272     const int prev_q_index =
4273         rq_history->q_index_history[rq_history->recode_count - 1];
4274     const double percent_diff = get_bits_percent_diff(target_bits, actual_bits);
4275     if (percent_diff > 50) {
4276       // Binary search.
4277       // When the actual_bits and target_bits are far apart, binary search
4278       // q_index is faster.
4279       q_index = (rq_history->q_index_low + rq_history->q_index_high) / 2;
4280     } else {
4281       if (rq_model->ready) {
4282         q_index = guess_q_index_from_model(rq_model, target_bits);
4283       } else {
4284         // TODO(angiebird): Find a better way to set the gap.
4285         q_index =
4286             guess_q_index_linear(prev_q_index, target_bits, actual_bits, 20);
4287       }
4288     }
4289   } else {
4290     if (rq_model->ready) {
4291       q_index = guess_q_index_from_model(rq_model, target_bits);
4292     }
4293   }
4294
4295   assert(rq_history->q_index_low <= rq_history->q_index_high);
4296   if (q_index <= rq_history->q_index_low) {
4297     q_index = rq_history->q_index_low + 1;
4298   }
4299   if (q_index >= rq_history->q_index_high) {
4300     q_index = rq_history->q_index_high - 1;
4301   }
4302   return q_index;
4303 }
4304
4305 static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
4306                             int target_bits, RATE_QSTEP_MODEL *rq_model) {
4307   const int recode_count = rq_history->recode_count;
4308   if (recode_count >= 2) {
4309     // Fit the ratio and bias of rq_model based on last two recode histories.
4310     const double s1 = vp9_convert_qindex_to_q(
4311         rq_history->q_index_history[recode_count - 2], VPX_BITS_8);
4312     const double s2 = vp9_convert_qindex_to_q(
4313         rq_history->q_index_history[recode_count - 1], VPX_BITS_8);
4314     const double r1 = rq_history->rate_history[recode_count - 2];
4315     const double r2 = rq_history->rate_history[recode_count - 1];
4316     rq_model->ratio = (r2 - r1) / (log2(s1) - log2(s2));
4317     rq_model->bias = r1 + (rq_model->ratio) * log2(s1);
4318     rq_model->ready = 1;
4319   } else if (recode_count == 1) {
4320     if (rq_model->ready) {
4321       // Update the ratio only when the initial model exists and we only have
4322       // one recode history.
4323       const int prev_q = rq_history->q_index_history[recode_count - 1];
4324       const double prev_q_step = vp9_convert_qindex_to_q(prev_q, VPX_BITS_8);
4325       const int actual_bits = rq_history->rate_history[recode_count - 1];
4326       rq_model->ratio =
4327           rq_model->ratio - (target_bits - actual_bits) / log2(prev_q_step);
4328     }
4329   }
4330 }
4331 #endif  // CONFIG_RATE_CTRL
4332
4333 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4334                                     uint8_t *dest) {
4335   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4336   VP9_COMMON *const cm = &cpi->common;
4337   RATE_CONTROL *const rc = &cpi->rc;
4338   int bottom_index, top_index;
4339   int loop_count = 0;
4340   int loop_at_this_size = 0;
4341   int loop = 0;
4342   int overshoot_seen = 0;
4343   int undershoot_seen = 0;
4344   int frame_over_shoot_limit;
4345   int frame_under_shoot_limit;
4346   int q = 0, q_low = 0, q_high = 0;
4347   int enable_acl;
4348 #ifdef AGGRESSIVE_VBR
4349   int qrange_adj = 1;
4350 #endif
4351
4352 #if CONFIG_RATE_CTRL
4353   const FRAME_UPDATE_TYPE update_type =
4354       cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
4355   const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
4356   RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
4357   RATE_QINDEX_HISTORY rq_history;
4358   init_rq_history(&rq_history);
4359 #endif  // CONFIG_RATE_CTRL
4360
4361   if (cm->show_existing_frame) {
4362     rc->this_frame_target = 0;
4363     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4364     return;
4365   }
4366
4367   set_size_independent_vars(cpi);
4368
4369   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4370                                        (cpi->twopass.gf_group.index == 1)
4371                                  : 0;
4372
4373   do {
4374     vpx_clear_system_state();
4375
4376     set_frame_size(cpi);
4377
4378     if (loop_count == 0 || cpi->resize_pending != 0) {
4379       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4380
4381 #ifdef AGGRESSIVE_VBR
4382       if (two_pass_first_group_inter(cpi)) {
4383         // Adjustment limits for min and max q
4384         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4385
4386         bottom_index =
4387             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4388         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4389       }
4390 #endif
4391       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4392       set_mv_search_params(cpi);
4393
4394       // Reset the loop state for new frame size.
4395       overshoot_seen = 0;
4396       undershoot_seen = 0;
4397
4398       // Reconfiguration for change in frame size has concluded.
4399       cpi->resize_pending = 0;
4400
4401       q_low = bottom_index;
4402       q_high = top_index;
4403
4404       loop_at_this_size = 0;
4405     }
4406
4407 #if CONFIG_RATE_CTRL
4408     {
4409       const int suggested_q_index = rq_model_predict_q_index(
4410           rq_model, &rq_history, rc->this_frame_target);
4411       if (suggested_q_index != -1) {
4412         q = suggested_q_index;
4413       }
4414     }
4415 #endif  // CONFIG_RATE_CTRL
4416     // Decide frame size bounds first time through.
4417     if (loop_count == 0) {
4418       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4419                                        &frame_under_shoot_limit,
4420                                        &frame_over_shoot_limit);
4421     }
4422
4423     cpi->Source =
4424         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4425                               (oxcf->pass == 0), EIGHTTAP, 0);
4426
4427     // Unfiltered raw source used in metrics calculation if the source
4428     // has been filtered.
4429     if (is_psnr_calc_enabled(cpi)) {
4430 #ifdef ENABLE_KF_DENOISE
4431       if (is_spatial_denoise_enabled(cpi)) {
4432         cpi->raw_source_frame = vp9_scale_if_required(
4433             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4434             (oxcf->pass == 0), EIGHTTAP, 0);
4435       } else {
4436         cpi->raw_source_frame = cpi->Source;
4437       }
4438 #else
4439       cpi->raw_source_frame = cpi->Source;
4440 #endif
4441     }
4442
4443     if (cpi->unscaled_last_source != NULL)
4444       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4445                                                &cpi->scaled_last_source,
4446                                                (oxcf->pass == 0), EIGHTTAP, 0);
4447
4448     if (frame_is_intra_only(cm) == 0) {
4449       if (loop_count > 0) {
4450         release_scaled_references(cpi);
4451       }
4452       vp9_scale_references(cpi);
4453     }
4454
4455 #if CONFIG_RATE_CTRL
4456     // TODO(angiebird): This is a hack for making sure the encoder use the
4457     // external_quantize_index exactly. Avoid this kind of hack later.
4458     if (cpi->encode_command.use_external_quantize_index) {
4459       q = cpi->encode_command.external_quantize_index;
4460     }
4461 #endif
4462
4463     vp9_set_quantizer(cpi, q);
4464
4465     if (loop_count == 0) setup_frame(cpi);
4466
4467     // Variance adaptive and in frame q adjustment experiments are mutually
4468     // exclusive.
4469     if (oxcf->aq_mode == VARIANCE_AQ) {
4470       vp9_vaq_frame_setup(cpi);
4471     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4472       vp9_360aq_frame_setup(cpi);
4473     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4474       vp9_setup_in_frame_q_adj(cpi);
4475     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4476       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4477     } else if (oxcf->aq_mode == PSNR_AQ) {
4478       vp9_psnr_aq_mode_setup(&cm->seg);
4479     }
4480
4481     vp9_encode_frame(cpi);
4482
4483     // Update the skip mb flag probabilities based on the distribution
4484     // seen in the last encoder iteration.
4485     // update_base_skip_probs(cpi);
4486
4487     vpx_clear_system_state();
4488
4489     // Dummy pack of the bitstream using up to date stats to get an
4490     // accurate estimate of output frame size to determine if we need
4491     // to recode.
4492     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4493       save_coding_context(cpi);
4494       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4495
4496       rc->projected_frame_size = (int)(*size) << 3;
4497
4498       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4499     }
4500
4501 #if CONFIG_RATE_CTRL
4502     // This part needs to be after save_coding_context() because
4503     // restore_coding_context will be called in the end of this function.
4504     // TODO(angiebird): This is a hack for making sure the encoder use the
4505     // external_quantize_index exactly. Avoid this kind of hack later.
4506     if (cpi->encode_command.use_external_quantize_index) {
4507       break;
4508     }
4509
4510     if (cpi->encode_command.use_external_target_frame_bits) {
4511       const double percent_diff = get_bits_percent_diff(
4512           rc->this_frame_target, rc->projected_frame_size);
4513       update_rq_history(&rq_history, rc->this_frame_target,
4514                         rc->projected_frame_size, q);
4515       loop_count += 1;
4516
4517       rq_model_update(&rq_history, rc->this_frame_target, rq_model);
4518
4519       // Check if we hit the target bitrate.
4520       if (percent_diff <= 15 ||
4521           rq_history.recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
4522           rq_history.q_index_low >= rq_history.q_index_high) {
4523         break;
4524       }
4525
4526       loop = 1;
4527       restore_coding_context(cpi);
4528       continue;
4529     }
4530 #endif  // CONFIG_RATE_CTRL
4531
4532     if (oxcf->rc_mode == VPX_Q) {
4533       loop = 0;
4534     } else {
4535       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4536           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4537         int last_q = q;
4538         int64_t kf_err;
4539
4540         int64_t high_err_target = cpi->ambient_err;
4541         int64_t low_err_target = cpi->ambient_err >> 1;
4542
4543 #if CONFIG_VP9_HIGHBITDEPTH
4544         if (cm->use_highbitdepth) {
4545           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4546         } else {
4547           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4548         }
4549 #else
4550         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4551 #endif  // CONFIG_VP9_HIGHBITDEPTH
4552
4553         // Prevent possible divide by zero error below for perfect KF
4554         kf_err += !kf_err;
4555
4556         // The key frame is not good enough or we can afford
4557         // to make it better without undue risk of popping.
4558         if ((kf_err > high_err_target &&
4559              rc->projected_frame_size <= frame_over_shoot_limit) ||
4560             (kf_err > low_err_target &&
4561              rc->projected_frame_size <= frame_under_shoot_limit)) {
4562           // Lower q_high
4563           q_high = q > q_low ? q - 1 : q_low;
4564
4565           // Adjust Q
4566           q = (int)((q * high_err_target) / kf_err);
4567           q = VPXMIN(q, (q_high + q_low) >> 1);
4568         } else if (kf_err < low_err_target &&
4569                    rc->projected_frame_size >= frame_under_shoot_limit) {
4570           // The key frame is much better than the previous frame
4571           // Raise q_low
4572           q_low = q < q_high ? q + 1 : q_high;
4573
4574           // Adjust Q
4575           q = (int)((q * low_err_target) / kf_err);
4576           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4577         }
4578
4579         // Clamp Q to upper and lower limits:
4580         q = clamp(q, q_low, q_high);
4581
4582         loop = q != last_q;
4583       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4584                                   frame_under_shoot_limit, q,
4585                                   VPXMAX(q_high, top_index), bottom_index)) {
4586         // Is the projected frame size out of range and are we allowed
4587         // to attempt to recode.
4588         int last_q = q;
4589         int retries = 0;
4590         int qstep;
4591
4592         if (cpi->resize_pending == 1) {
4593           // Change in frame size so go back around the recode loop.
4594           cpi->rc.frame_size_selector =
4595               SCALE_STEP1 - cpi->rc.frame_size_selector;
4596           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4597
4598 #if CONFIG_INTERNAL_STATS
4599           ++cpi->tot_recode_hits;
4600 #endif
4601           ++loop_count;
4602           loop = 1;
4603           continue;
4604         }
4605
4606         // Frame size out of permitted range:
4607         // Update correction factor & compute new Q to try...
4608
4609         // Frame is too large
4610         if (rc->projected_frame_size > rc->this_frame_target) {
4611           // Special case if the projected size is > the max allowed.
4612           if ((q == q_high) &&
4613               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4614                (!rc->is_src_frame_alt_ref &&
4615                 (rc->projected_frame_size >=
4616                  big_rate_miss_high_threshold(cpi))))) {
4617             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4618                                             big_rate_miss_high_threshold(cpi)));
4619             double q_val_high;
4620             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4621             q_val_high =
4622                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4623             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4624             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4625           }
4626
4627           // Raise Qlow as to at least the current value
4628           qstep =
4629               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4630           q_low = VPXMIN(q + qstep, q_high);
4631
4632           if (undershoot_seen || loop_at_this_size > 1) {
4633             // Update rate_correction_factor unless
4634             vp9_rc_update_rate_correction_factors(cpi);
4635
4636             q = (q_high + q_low + 1) / 2;
4637           } else {
4638             // Update rate_correction_factor unless
4639             vp9_rc_update_rate_correction_factors(cpi);
4640
4641             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4642                                   VPXMAX(q_high, top_index));
4643
4644             while (q < q_low && retries < 10) {
4645               vp9_rc_update_rate_correction_factors(cpi);
4646               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4647                                     VPXMAX(q_high, top_index));
4648               retries++;
4649             }
4650           }
4651
4652           overshoot_seen = 1;
4653         } else {
4654           // Frame is too small
4655           qstep =
4656               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4657           q_high = VPXMAX(q - qstep, q_low);
4658
4659           if (overshoot_seen || loop_at_this_size > 1) {
4660             vp9_rc_update_rate_correction_factors(cpi);
4661             q = (q_high + q_low) / 2;
4662           } else {
4663             vp9_rc_update_rate_correction_factors(cpi);
4664             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4665                                   VPXMIN(q_low, bottom_index), top_index);
4666             // Special case reset for qlow for constrained quality.
4667             // This should only trigger where there is very substantial
4668             // undershoot on a frame and the auto cq level is above
4669             // the user passed in value.
4670             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4671               q_low = q;
4672             }
4673
4674             while (q > q_high && retries < 10) {
4675               vp9_rc_update_rate_correction_factors(cpi);
4676               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4677                                     VPXMIN(q_low, bottom_index), top_index);
4678               retries++;
4679             }
4680           }
4681           undershoot_seen = 1;
4682         }
4683
4684         // Clamp Q to upper and lower limits:
4685         q = clamp(q, q_low, q_high);
4686
4687         loop = (q != last_q);
4688       } else {
4689         loop = 0;
4690       }
4691     }
4692
4693     // Special case for overlay frame.
4694     if (rc->is_src_frame_alt_ref &&
4695         rc->projected_frame_size < rc->max_frame_bandwidth)
4696       loop = 0;
4697
4698     if (loop) {
4699       ++loop_count;
4700       ++loop_at_this_size;
4701
4702 #if CONFIG_INTERNAL_STATS
4703       ++cpi->tot_recode_hits;
4704 #endif
4705     }
4706
4707     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4708       if (loop) restore_coding_context(cpi);
4709   } while (loop);
4710
4711 #ifdef AGGRESSIVE_VBR
4712   if (two_pass_first_group_inter(cpi)) {
4713     cpi->twopass.active_worst_quality =
4714         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4715   } else if (!frame_is_kf_gf_arf(cpi)) {
4716 #else
4717   if (!frame_is_kf_gf_arf(cpi)) {
4718 #endif
4719     // Have we been forced to adapt Q outside the expected range by an extreme
4720     // rate miss. If so adjust the active maxQ for the subsequent frames.
4721     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4722       cpi->twopass.active_worst_quality = q;
4723     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4724                rc->projected_frame_size < rc->this_frame_target) {
4725       cpi->twopass.active_worst_quality =
4726           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4727     }
4728   }
4729
4730   if (enable_acl) {
4731     // Skip recoding, if model diff is below threshold
4732     const int thresh = compute_context_model_thresh(cpi);
4733     const int diff = compute_context_model_diff(cm);
4734     if (diff >= thresh) {
4735       vp9_encode_frame(cpi);
4736     }
4737   }
4738   if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4739     vpx_clear_system_state();
4740     restore_coding_context(cpi);
4741   }
4742 }
4743 #endif  // !CONFIG_REALTIME_ONLY
4744
4745 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4746   const int *const map = cpi->common.ref_frame_map;
4747   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4748   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4749   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4750   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4751
4752   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4753
4754   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4755       (cpi->svc.number_temporal_layers == 1 &&
4756        cpi->svc.number_spatial_layers == 1))
4757     flags &= ~VP9_GOLD_FLAG;
4758
4759   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4760
4761   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4762
4763   return flags;
4764 }
4765
4766 static void set_ext_overrides(VP9_COMP *cpi) {
4767   // Overrides the defaults with the externally supplied values with
4768   // vp9_update_reference() and vp9_update_entropy() calls
4769   // Note: The overrides are valid only for the next frame passed
4770   // to encode_frame_to_data_rate() function
4771   if (cpi->ext_refresh_frame_context_pending) {
4772     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4773     cpi->ext_refresh_frame_context_pending = 0;
4774   }
4775   if (cpi->ext_refresh_frame_flags_pending) {
4776     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4777     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4778     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4779   }
4780 }
4781
4782 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4783     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4784     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4785     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4786   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4787       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4788 #if CONFIG_VP9_HIGHBITDEPTH
4789     if (cm->bit_depth == VPX_BITS_8) {
4790       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4791                                  phase_scaler2);
4792       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4793                                  phase_scaler);
4794     } else {
4795       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4796                              filter_type2, phase_scaler2);
4797       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4798                              filter_type, phase_scaler);
4799     }
4800 #else
4801     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4802                                phase_scaler2);
4803     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4804 #endif  // CONFIG_VP9_HIGHBITDEPTH
4805     return scaled;
4806   } else {
4807     return unscaled;
4808   }
4809 }
4810
4811 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4812     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4813     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4814   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4815       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4816 #if CONFIG_VP9_HIGHBITDEPTH
4817     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4818         unscaled->y_height <= (scaled->y_height << 1))
4819       if (cm->bit_depth == VPX_BITS_8)
4820         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4821       else
4822         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4823                                filter_type, phase_scaler);
4824     else
4825       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4826 #else
4827     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4828         unscaled->y_height <= (scaled->y_height << 1))
4829       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4830     else
4831       scale_and_extend_frame_nonnormative(unscaled, scaled);
4832 #endif  // CONFIG_VP9_HIGHBITDEPTH
4833     return scaled;
4834   } else {
4835     return unscaled;
4836   }
4837 }
4838
4839 static void set_ref_sign_bias(VP9_COMP *cpi) {
4840   VP9_COMMON *const cm = &cpi->common;
4841   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4842   const int cur_frame_index = ref_buffer->frame_index;
4843   MV_REFERENCE_FRAME ref_frame;
4844
4845   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4846     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4847     const RefCntBuffer *const ref_cnt_buf =
4848         get_ref_cnt_buffer(&cpi->common, buf_idx);
4849     if (ref_cnt_buf) {
4850       cm->ref_frame_sign_bias[ref_frame] =
4851           cur_frame_index < ref_cnt_buf->frame_index;
4852     }
4853   }
4854 }
4855
4856 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4857   INTERP_FILTER ifilter;
4858   int ref_total[MAX_REF_FRAMES] = { 0 };
4859   MV_REFERENCE_FRAME ref;
4860   int mask = 0;
4861   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4862     return mask;
4863   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4864     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4865       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4866
4867   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4868     if ((ref_total[LAST_FRAME] &&
4869          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4870         (ref_total[GOLDEN_FRAME] == 0 ||
4871          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4872              ref_total[GOLDEN_FRAME]) &&
4873         (ref_total[ALTREF_FRAME] == 0 ||
4874          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4875              ref_total[ALTREF_FRAME]))
4876       mask |= 1 << ifilter;
4877   }
4878   return mask;
4879 }
4880
4881 #ifdef ENABLE_KF_DENOISE
4882 // Baseline kernel weights for denoise
4883 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4884 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4885                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4886
4887 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4888                                      uint8_t point_weight, int *sum_val,
4889                                      int *sum_weight) {
4890   if (abs(centre_val - data_val) <= thresh) {
4891     *sum_weight += point_weight;
4892     *sum_val += (int)data_val * (int)point_weight;
4893   }
4894 }
4895
4896 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4897                                   const int strength) {
4898   int sum_weight = 0;
4899   int sum_val = 0;
4900   int thresh = strength;
4901   int kernal_size = 5;
4902   int half_k_size = 2;
4903   int i, j;
4904   int max_diff = 0;
4905   uint8_t *tmp_ptr;
4906   uint8_t *kernal_ptr;
4907
4908   // Find the maximum deviation from the source point in the locale.
4909   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4910   for (i = 0; i < kernal_size + 2; ++i) {
4911     for (j = 0; j < kernal_size + 2; ++j) {
4912       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4913     }
4914     tmp_ptr += stride;
4915   }
4916
4917   // Select the kernel size.
4918   if (max_diff > (strength + (strength >> 1))) {
4919     kernal_size = 3;
4920     half_k_size = 1;
4921     thresh = thresh >> 1;
4922   }
4923   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4924
4925   // Apply the kernel
4926   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4927   for (i = 0; i < kernal_size; ++i) {
4928     for (j = 0; j < kernal_size; ++j) {
4929       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4930                         &sum_val, &sum_weight);
4931       ++kernal_ptr;
4932     }
4933     tmp_ptr += stride;
4934   }
4935
4936   // Update the source value with the new filtered value
4937   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4938 }
4939
4940 #if CONFIG_VP9_HIGHBITDEPTH
4941 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4942                                          const int strength) {
4943   int sum_weight = 0;
4944   int sum_val = 0;
4945   int thresh = strength;
4946   int kernal_size = 5;
4947   int half_k_size = 2;
4948   int i, j;
4949   int max_diff = 0;
4950   uint16_t *tmp_ptr;
4951   uint8_t *kernal_ptr;
4952
4953   // Find the maximum deviation from the source point in the locale.
4954   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4955   for (i = 0; i < kernal_size + 2; ++i) {
4956     for (j = 0; j < kernal_size + 2; ++j) {
4957       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4958     }
4959     tmp_ptr += stride;
4960   }
4961
4962   // Select the kernel size.
4963   if (max_diff > (strength + (strength >> 1))) {
4964     kernal_size = 3;
4965     half_k_size = 1;
4966     thresh = thresh >> 1;
4967   }
4968   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4969
4970   // Apply the kernel
4971   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4972   for (i = 0; i < kernal_size; ++i) {
4973     for (j = 0; j < kernal_size; ++j) {
4974       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4975                         &sum_val, &sum_weight);
4976       ++kernal_ptr;
4977     }
4978     tmp_ptr += stride;
4979   }
4980
4981   // Update the source value with the new filtered value
4982   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4983 }
4984 #endif  // CONFIG_VP9_HIGHBITDEPTH
4985
4986 // Apply thresholded spatial noise suppression to a given buffer.
4987 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4988                                    const int stride, const int width,
4989                                    const int height, const int strength) {
4990   VP9_COMMON *const cm = &cpi->common;
4991   uint8_t *src_ptr = buffer;
4992   int row;
4993   int col;
4994
4995   for (row = 0; row < height; ++row) {
4996     for (col = 0; col < width; ++col) {
4997 #if CONFIG_VP9_HIGHBITDEPTH
4998       if (cm->use_highbitdepth)
4999         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
5000                                      strength);
5001       else
5002         spatial_denoise_point(&src_ptr[col], stride, strength);
5003 #else
5004       spatial_denoise_point(&src_ptr[col], stride, strength);
5005 #endif  // CONFIG_VP9_HIGHBITDEPTH
5006     }
5007     src_ptr += stride;
5008   }
5009 }
5010
5011 // Apply thresholded spatial noise suppression to source.
5012 static void spatial_denoise_frame(VP9_COMP *cpi) {
5013   YV12_BUFFER_CONFIG *src = cpi->Source;
5014   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5015   TWO_PASS *const twopass = &cpi->twopass;
5016   VP9_COMMON *const cm = &cpi->common;
5017
5018   // Base the filter strength on the current active max Q.
5019   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
5020                                               cm->bit_depth));
5021   int strength =
5022       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
5023
5024   // Denoise each of Y,U and V buffers.
5025   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
5026                          src->y_height, strength);
5027
5028   strength += (strength >> 1);
5029   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
5030                          src->uv_height, strength << 1);
5031
5032   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
5033                          src->uv_height, strength << 1);
5034 }
5035 #endif  // ENABLE_KF_DENOISE
5036
5037 #if !CONFIG_REALTIME_ONLY
5038 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
5039                                          uint8_t *dest) {
5040   if (cpi->common.seg.enabled)
5041     if (ALT_REF_AQ_PROTECT_GAIN) {
5042       size_t nsize = *size;
5043       int overhead;
5044
5045       // TODO(yuryg): optimize this, as
5046       // we don't really need to repack
5047
5048       save_coding_context(cpi);
5049       vp9_disable_segmentation(&cpi->common.seg);
5050       vp9_pack_bitstream(cpi, dest, &nsize);
5051       restore_coding_context(cpi);
5052
5053       overhead = (int)*size - (int)nsize;
5054
5055       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
5056         vp9_encode_frame(cpi);
5057       else
5058         vp9_enable_segmentation(&cpi->common.seg);
5059     }
5060 }
5061 #endif
5062
5063 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
5064   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
5065
5066   if (ref_buffer) {
5067     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5068     ref_buffer->frame_index =
5069         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
5070 #if CONFIG_RATE_CTRL
5071     ref_buffer->frame_coding_index = cm->current_frame_coding_index;
5072 #endif  // CONFIG_RATE_CTRL
5073   }
5074 }
5075
5076 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
5077   VP9_COMMON *cm = &cpi->common;
5078   ThreadData *td = &cpi->td;
5079   MACROBLOCK *x = &td->mb;
5080   MACROBLOCKD *xd = &x->e_mbd;
5081   uint8_t *y_buffer = cpi->Source->y_buffer;
5082   const int y_stride = cpi->Source->y_stride;
5083   const int block_size = BLOCK_16X16;
5084
5085   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
5086   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
5087   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
5088   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
5089   double log_sum = 0.0;
5090   int row, col;
5091
5092   // Loop through each 64x64 block.
5093   for (row = 0; row < num_rows; ++row) {
5094     for (col = 0; col < num_cols; ++col) {
5095       int mi_row, mi_col;
5096       double var = 0.0, num_of_var = 0.0;
5097       const int index = row * num_cols + col;
5098
5099       for (mi_row = row * num_8x8_h;
5100            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
5101         for (mi_col = col * num_8x8_w;
5102              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
5103           struct buf_2d buf;
5104           const int row_offset_y = mi_row << 3;
5105           const int col_offset_y = mi_col << 3;
5106
5107           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
5108           buf.stride = y_stride;
5109
5110           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
5111           // and high bit videos, the variance needs to be divided by 2.0 or
5112           // 64.0 separately.
5113           // TODO(sdeng): need to tune for 12bit videos.
5114 #if CONFIG_VP9_HIGHBITDEPTH
5115           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
5116             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
5117           else
5118 #endif
5119             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
5120
5121           num_of_var += 1.0;
5122         }
5123       }
5124       var = var / num_of_var / 64.0;
5125
5126       // Curve fitting with an exponential model on all 16x16 blocks from the
5127       // Midres dataset.
5128       var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
5129       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
5130       log_sum += log(var);
5131     }
5132   }
5133   log_sum = exp(log_sum / (double)(num_rows * num_cols));
5134
5135   for (row = 0; row < num_rows; ++row) {
5136     for (col = 0; col < num_cols; ++col) {
5137       const int index = row * num_cols + col;
5138       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
5139     }
5140   }
5141
5142   (void)xd;
5143 }
5144
5145 // Process the wiener variance in 16x16 block basis.
5146 static int qsort_comp(const void *elem1, const void *elem2) {
5147   int a = *((const int *)elem1);
5148   int b = *((const int *)elem2);
5149   if (a > b) return 1;
5150   if (a < b) return -1;
5151   return 0;
5152 }
5153
5154 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
5155   VP9_COMMON *cm = &cpi->common;
5156
5157   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
5158       cpi->mb_wiener_var_cols >= cm->mb_cols)
5159     return;
5160
5161   vpx_free(cpi->mb_wiener_variance);
5162   cpi->mb_wiener_variance = NULL;
5163
5164   CHECK_MEM_ERROR(
5165       cm, cpi->mb_wiener_variance,
5166       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
5167   cpi->mb_wiener_var_rows = cm->mb_rows;
5168   cpi->mb_wiener_var_cols = cm->mb_cols;
5169 }
5170
5171 static void set_mb_wiener_variance(VP9_COMP *cpi) {
5172   VP9_COMMON *cm = &cpi->common;
5173   uint8_t *buffer = cpi->Source->y_buffer;
5174   int buf_stride = cpi->Source->y_stride;
5175
5176 #if CONFIG_VP9_HIGHBITDEPTH
5177   ThreadData *td = &cpi->td;
5178   MACROBLOCK *x = &td->mb;
5179   MACROBLOCKD *xd = &x->e_mbd;
5180   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
5181   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
5182   uint8_t *zero_pred;
5183 #else
5184   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
5185 #endif
5186
5187   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
5188   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
5189
5190   int mb_row, mb_col, count = 0;
5191   // Hard coded operating block size
5192   const int block_size = 16;
5193   const int coeff_count = block_size * block_size;
5194   const TX_SIZE tx_size = TX_16X16;
5195
5196 #if CONFIG_VP9_HIGHBITDEPTH
5197   xd->cur_buf = cpi->Source;
5198   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5199     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
5200     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
5201   } else {
5202     zero_pred = zero_pred8;
5203     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
5204   }
5205 #else
5206   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
5207 #endif
5208
5209   cpi->norm_wiener_variance = 0;
5210
5211   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
5212     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
5213       int idx;
5214       int16_t median_val = 0;
5215       uint8_t *mb_buffer =
5216           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
5217       int64_t wiener_variance = 0;
5218
5219 #if CONFIG_VP9_HIGHBITDEPTH
5220       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5221         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
5222                                   mb_buffer, buf_stride, zero_pred, block_size,
5223                                   xd->bd);
5224         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5225       } else {
5226         vpx_subtract_block(block_size, block_size, src_diff, block_size,
5227                            mb_buffer, buf_stride, zero_pred, block_size);
5228         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5229       }
5230 #else
5231       vpx_subtract_block(block_size, block_size, src_diff, block_size,
5232                          mb_buffer, buf_stride, zero_pred, block_size);
5233       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
5234 #endif  // CONFIG_VP9_HIGHBITDEPTH
5235
5236       coeff[0] = 0;
5237       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
5238
5239       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
5240
5241       // Noise level estimation
5242       median_val = coeff[coeff_count / 2];
5243
5244       // Wiener filter
5245       for (idx = 1; idx < coeff_count; ++idx) {
5246         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
5247         int64_t tmp_coeff = (int64_t)coeff[idx];
5248         if (median_val) {
5249           tmp_coeff = (sqr_coeff * coeff[idx]) /
5250                       (sqr_coeff + (int64_t)median_val * median_val);
5251         }
5252         wiener_variance += tmp_coeff * tmp_coeff;
5253       }
5254       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
5255           wiener_variance / coeff_count;
5256       cpi->norm_wiener_variance +=
5257           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
5258       ++count;
5259     }
5260   }
5261
5262   if (count) cpi->norm_wiener_variance /= count;
5263   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
5264 }
5265
5266 #if !CONFIG_REALTIME_ONLY
5267 static void update_encode_frame_result(
5268     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
5269     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
5270     RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
5271     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
5272 #if CONFIG_RATE_CTRL
5273     const PARTITION_INFO *partition_info,
5274     const MOTION_VECTOR_INFO *motion_vector_info,
5275 #endif  // CONFIG_RATE_CTRL
5276     ENCODE_FRAME_RESULT *encode_frame_result);
5277 #endif  // !CONFIG_REALTIME_ONLY
5278
5279 static void encode_frame_to_data_rate(
5280     VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
5281     ENCODE_FRAME_RESULT *encode_frame_result) {
5282   VP9_COMMON *const cm = &cpi->common;
5283   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
5284   struct segmentation *const seg = &cm->seg;
5285   TX_SIZE t;
5286
5287   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5288   // No need to set svc.skip_enhancement_layer if whole superframe will be
5289   // dropped.
5290   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5291       cpi->oxcf.target_bandwidth == 0 &&
5292       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5293         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5294          cpi->svc
5295              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5296                                                 1]) &&
5297         cpi->svc.drop_spatial_layer[0])) {
5298     cpi->svc.skip_enhancement_layer = 1;
5299     vp9_rc_postencode_update_drop_frame(cpi);
5300     cpi->ext_refresh_frame_flags_pending = 0;
5301     cpi->last_frame_dropped = 1;
5302     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5303     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5304     vp9_inc_frame_in_layer(cpi);
5305     return;
5306   }
5307
5308   set_ext_overrides(cpi);
5309   vpx_clear_system_state();
5310
5311 #ifdef ENABLE_KF_DENOISE
5312   // Spatial denoise of key frame.
5313   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5314 #endif
5315
5316   if (cm->show_existing_frame == 0) {
5317     // Update frame index
5318     set_frame_index(cpi, cm);
5319
5320     // Set the arf sign bias for this frame.
5321     set_ref_sign_bias(cpi);
5322   }
5323
5324   // Set default state for segment based loop filter update flags.
5325   cm->lf.mode_ref_delta_update = 0;
5326
5327   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5328     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5329
5330   // Set various flags etc to special state if it is a key frame.
5331   if (frame_is_intra_only(cm)) {
5332     // Reset the loop filter deltas and segmentation map.
5333     vp9_reset_segment_features(&cm->seg);
5334
5335     // If segmentation is enabled force a map update for key frames.
5336     if (seg->enabled) {
5337       seg->update_map = 1;
5338       seg->update_data = 1;
5339     }
5340
5341     // The alternate reference frame cannot be active for a key frame.
5342     cpi->rc.source_alt_ref_active = 0;
5343
5344     cm->error_resilient_mode = oxcf->error_resilient_mode;
5345     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5346
5347     // By default, encoder assumes decoder can use prev_mi.
5348     if (cm->error_resilient_mode) {
5349       cm->frame_parallel_decoding_mode = 1;
5350       cm->reset_frame_context = 0;
5351       cm->refresh_frame_context = 0;
5352     } else if (cm->intra_only) {
5353       // Only reset the current context.
5354       cm->reset_frame_context = 2;
5355     }
5356   }
5357
5358   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5359
5360   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5361     init_mb_wiener_var_buffer(cpi);
5362     set_mb_wiener_variance(cpi);
5363   }
5364
5365   vpx_clear_system_state();
5366
5367 #if CONFIG_INTERNAL_STATS
5368   memset(cpi->mode_chosen_counts, 0,
5369          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5370 #endif
5371 #if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5372   // Backup to ensure consistency between recodes
5373   save_encode_params(cpi);
5374 #endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
5375
5376   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5377     if (!encode_without_recode_loop(cpi, size, dest)) return;
5378   } else {
5379 #if !CONFIG_REALTIME_ONLY
5380     encode_with_recode_loop(cpi, size, dest);
5381 #endif
5382   }
5383
5384   // TODO(jingning): When using show existing frame mode, we assume that the
5385   // current ARF will be directly used as the final reconstructed frame. This is
5386   // an encoder control scheme. One could in principle explore other
5387   // possibilities to arrange the reference frame buffer and their coding order.
5388   if (cm->show_existing_frame) {
5389     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5390                cm->ref_frame_map[cpi->alt_fb_idx]);
5391   }
5392
5393 #if !CONFIG_REALTIME_ONLY
5394   // Disable segmentation if it decrease rate/distortion ratio
5395   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5396     vp9_try_disable_lookahead_aq(cpi, size, dest);
5397 #endif
5398
5399 #if CONFIG_VP9_TEMPORAL_DENOISING
5400 #ifdef OUTPUT_YUV_DENOISED
5401   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5402     vpx_write_yuv_frame(yuv_denoised_file,
5403                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5404   }
5405 #endif
5406 #endif
5407 #ifdef OUTPUT_YUV_SKINMAP
5408   if (cpi->common.current_video_frame > 1) {
5409     vp9_output_skin_map(cpi, yuv_skinmap_file);
5410   }
5411 #endif
5412
5413   // Special case code to reduce pulsing when key frames are forced at a
5414   // fixed interval. Note the reconstruction error if it is the frame before
5415   // the force key frame
5416   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5417 #if CONFIG_VP9_HIGHBITDEPTH
5418     if (cm->use_highbitdepth) {
5419       cpi->ambient_err =
5420           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5421     } else {
5422       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5423     }
5424 #else
5425     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5426 #endif  // CONFIG_VP9_HIGHBITDEPTH
5427   }
5428
5429   // If the encoder forced a KEY_FRAME decision
5430   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5431
5432   cm->frame_to_show = get_frame_new_buffer(cm);
5433   cm->frame_to_show->color_space = cm->color_space;
5434   cm->frame_to_show->color_range = cm->color_range;
5435   cm->frame_to_show->render_width = cm->render_width;
5436   cm->frame_to_show->render_height = cm->render_height;
5437
5438   // Pick the loop filter level for the frame.
5439   loopfilter_frame(cpi, cm);
5440
5441   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5442
5443   // build the bitstream
5444   vp9_pack_bitstream(cpi, dest, size);
5445
5446 #if CONFIG_REALTIME_ONLY
5447   (void)encode_frame_result;
5448   assert(encode_frame_result == NULL);
5449 #else  // CONFIG_REALTIME_ONLY
5450   if (encode_frame_result != NULL) {
5451     const int ref_frame_flags = get_ref_frame_flags(cpi);
5452     const RefCntBuffer *coded_frame_buf =
5453         get_ref_cnt_buffer(cm, cm->new_fb_idx);
5454     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
5455     get_ref_frame_bufs(cpi, ref_frame_bufs);
5456     // update_encode_frame_result() depends on twopass.gf_group.index and
5457     // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
5458     // cpi->alt_fb_idx are updated for current frame and have
5459     // not been updated for the next frame yet.
5460     // The update locations are as follows.
5461     // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
5462     // for the first frame in the gf_group and is updated for the next frame at
5463     // vp9_twopass_postencode_update().
5464     // 2) cpi->Source is updated at the beginning of vp9_get_compressed_data()
5465     // 3) cm->new_fb_idx is updated at the beginning of
5466     // vp9_get_compressed_data() by get_free_fb(cm).
5467     // 4) cpi->lst_fb_idx/gld_fb_idx/alt_fb_idx will be updated for the next
5468     // frame at vp9_update_reference_frames().
5469     // This function needs to be called before vp9_update_reference_frames().
5470     // TODO(angiebird): Improve the codebase to make the update of frame
5471     // dependent variables more robust.
5472     update_encode_frame_result(
5473         ref_frame_flags,
5474         cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
5475         cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
5476         cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
5477 #if CONFIG_RATE_CTRL
5478         cpi->partition_info, cpi->motion_vector_info,
5479 #endif  // CONFIG_RATE_CTRL
5480         encode_frame_result);
5481   }
5482 #endif  // CONFIG_REALTIME_ONLY
5483
5484   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5485       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5486     restore_coding_context(cpi);
5487     return;
5488   }
5489
5490   cpi->last_frame_dropped = 0;
5491   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5492   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5493     cpi->svc.num_encoded_top_layer++;
5494
5495   // Keep track of the frame buffer index updated/refreshed for the
5496   // current encoded TL0 superframe.
5497   if (cpi->svc.temporal_layer_id == 0) {
5498     if (cpi->refresh_last_frame)
5499       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5500     else if (cpi->refresh_golden_frame)
5501       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5502     else if (cpi->refresh_alt_ref_frame)
5503       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5504   }
5505
5506   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5507
5508   if (frame_is_intra_only(cm) == 0) {
5509     release_scaled_references(cpi);
5510   }
5511   vp9_update_reference_frames(cpi);
5512
5513   if (!cm->show_existing_frame) {
5514     for (t = TX_4X4; t <= TX_32X32; ++t) {
5515       full_to_model_counts(cpi->td.counts->coef[t],
5516                            cpi->td.rd_counts.coef_counts[t]);
5517     }
5518
5519     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5520       if (!frame_is_intra_only(cm)) {
5521         vp9_adapt_mode_probs(cm);
5522         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5523       }
5524       vp9_adapt_coef_probs(cm);
5525     }
5526   }
5527
5528   cpi->ext_refresh_frame_flags_pending = 0;
5529
5530   if (cpi->refresh_golden_frame == 1)
5531     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5532   else
5533     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5534
5535   if (cpi->refresh_alt_ref_frame == 1)
5536     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5537   else
5538     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5539
5540   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5541
5542   cm->last_frame_type = cm->frame_type;
5543
5544   vp9_rc_postencode_update(cpi, *size);
5545
5546   if (oxcf->pass == 0 && !frame_is_intra_only(cm) &&
5547       (!cpi->use_svc ||
5548        (cpi->use_svc &&
5549         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
5550         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) {
5551     vp9_compute_frame_low_motion(cpi);
5552   }
5553
5554   *size = VPXMAX(1, *size);
5555
5556 #if 0
5557   output_frame_level_debug_stats(cpi);
5558 #endif
5559
5560   if (cm->frame_type == KEY_FRAME) {
5561     // Tell the caller that the frame was coded as a key frame
5562     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5563   } else {
5564     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5565   }
5566
5567   // Clear the one shot update flags for segmentation map and mode/ref loop
5568   // filter deltas.
5569   cm->seg.update_map = 0;
5570   cm->seg.update_data = 0;
5571   cm->lf.mode_ref_delta_update = 0;
5572
5573   // keep track of the last coded dimensions
5574   cm->last_width = cm->width;
5575   cm->last_height = cm->height;
5576
5577   // reset to normal state now that we are done.
5578   if (!cm->show_existing_frame) {
5579     cm->last_show_frame = cm->show_frame;
5580     cm->prev_frame = cm->cur_frame;
5581   }
5582
5583   if (cm->show_frame) {
5584     vp9_swap_mi_and_prev_mi(cm);
5585     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5586   }
5587   update_frame_indexes(cm, cm->show_frame);
5588
5589   if (cpi->use_svc) {
5590     cpi->svc
5591         .layer_context[cpi->svc.spatial_layer_id *
5592                            cpi->svc.number_temporal_layers +
5593                        cpi->svc.temporal_layer_id]
5594         .last_frame_type = cm->frame_type;
5595     // Reset layer_sync back to 0 for next frame.
5596     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5597   }
5598
5599   cpi->force_update_segmentation = 0;
5600
5601 #if !CONFIG_REALTIME_ONLY
5602   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5603     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5604 #endif
5605
5606   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5607   cpi->svc.set_intra_only_frame = 0;
5608 }
5609
5610 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5611                       unsigned int *frame_flags) {
5612   vp9_rc_get_svc_params(cpi);
5613   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5614                             /*encode_frame_result = */ NULL);
5615 }
5616
5617 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5618                         unsigned int *frame_flags) {
5619   if (cpi->oxcf.rc_mode == VPX_CBR) {
5620     vp9_rc_get_one_pass_cbr_params(cpi);
5621   } else {
5622     vp9_rc_get_one_pass_vbr_params(cpi);
5623   }
5624   encode_frame_to_data_rate(cpi, size, dest, frame_flags,
5625                             /*encode_frame_result = */ NULL);
5626 }
5627
5628 #if !CONFIG_REALTIME_ONLY
5629 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5630                         unsigned int *frame_flags,
5631                         ENCODE_FRAME_RESULT *encode_frame_result) {
5632   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5633 #if CONFIG_MISMATCH_DEBUG
5634   mismatch_move_frame_idx_w();
5635 #endif
5636   encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
5637 }
5638 #endif  // !CONFIG_REALTIME_ONLY
5639
5640 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5641                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5642                           int64_t end_time) {
5643   VP9_COMMON *const cm = &cpi->common;
5644   struct vpx_usec_timer timer;
5645   int res = 0;
5646   const int subsampling_x = sd->subsampling_x;
5647   const int subsampling_y = sd->subsampling_y;
5648 #if CONFIG_VP9_HIGHBITDEPTH
5649   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5650 #else
5651   const int use_highbitdepth = 0;
5652 #endif
5653
5654   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5655 #if CONFIG_VP9_TEMPORAL_DENOISING
5656   setup_denoiser_buffer(cpi);
5657 #endif
5658
5659   alloc_raw_frame_buffers(cpi);
5660
5661   vpx_usec_timer_start(&timer);
5662
5663   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5664                          use_highbitdepth, frame_flags))
5665     res = -1;
5666   vpx_usec_timer_mark(&timer);
5667   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5668
5669   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5670       (subsampling_x != 1 || subsampling_y != 1)) {
5671     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5672                        "Non-4:2:0 color format requires profile 1 or 3");
5673     res = -1;
5674   }
5675   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5676       (subsampling_x == 1 && subsampling_y == 1)) {
5677     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5678                        "4:2:0 color format requires profile 0 or 2");
5679     res = -1;
5680   }
5681
5682   return res;
5683 }
5684
5685 static int frame_is_reference(const VP9_COMP *cpi) {
5686   const VP9_COMMON *cm = &cpi->common;
5687
5688   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5689          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5690          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5691          cm->seg.update_map || cm->seg.update_data;
5692 }
5693
5694 static void adjust_frame_rate(VP9_COMP *cpi,
5695                               const struct lookahead_entry *source) {
5696   int64_t this_duration;
5697   int step = 0;
5698
5699   if (source->ts_start == cpi->first_time_stamp_ever) {
5700     this_duration = source->ts_end - source->ts_start;
5701     step = 1;
5702   } else {
5703     int64_t last_duration =
5704         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5705
5706     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5707
5708     // do a step update if the duration changes by 10%
5709     if (last_duration)
5710       step = (int)((this_duration - last_duration) * 10 / last_duration);
5711   }
5712
5713   if (this_duration) {
5714     if (step) {
5715       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5716     } else {
5717       // Average this frame's rate into the last second's average
5718       // frame rate. If we haven't seen 1 second yet, then average
5719       // over the whole interval seen.
5720       const double interval = VPXMIN(
5721           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5722       double avg_duration = 10000000.0 / cpi->framerate;
5723       avg_duration *= (interval - avg_duration + this_duration);
5724       avg_duration /= interval;
5725
5726       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5727     }
5728   }
5729   cpi->last_time_stamp_seen = source->ts_start;
5730   cpi->last_end_time_stamp_seen = source->ts_end;
5731 }
5732
5733 // Returns 0 if this is not an alt ref else the offset of the source frame
5734 // used as the arf midpoint.
5735 static int get_arf_src_index(VP9_COMP *cpi) {
5736   RATE_CONTROL *const rc = &cpi->rc;
5737   int arf_src_index = 0;
5738   if (is_altref_enabled(cpi)) {
5739     if (cpi->oxcf.pass == 2) {
5740       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5741       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5742         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5743       }
5744     } else if (rc->source_alt_ref_pending) {
5745       arf_src_index = rc->frames_till_gf_update_due;
5746     }
5747   }
5748   return arf_src_index;
5749 }
5750
5751 static void check_src_altref(VP9_COMP *cpi,
5752                              const struct lookahead_entry *source) {
5753   RATE_CONTROL *const rc = &cpi->rc;
5754
5755   if (cpi->oxcf.pass == 2) {
5756     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5757     rc->is_src_frame_alt_ref =
5758         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5759   } else {
5760     rc->is_src_frame_alt_ref =
5761         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5762   }
5763
5764   if (rc->is_src_frame_alt_ref) {
5765     // Current frame is an ARF overlay frame.
5766     cpi->alt_ref_source = NULL;
5767
5768     // Don't refresh the last buffer for an ARF overlay frame. It will
5769     // become the GF so preserve last as an alternative prediction option.
5770     cpi->refresh_last_frame = 0;
5771   }
5772 }
5773
5774 #if CONFIG_INTERNAL_STATS
5775 static void adjust_image_stat(double y, double u, double v, double all,
5776                               ImageStat *s) {
5777   s->stat[Y] += y;
5778   s->stat[U] += u;
5779   s->stat[V] += v;
5780   s->stat[ALL] += all;
5781   s->worst = VPXMIN(s->worst, all);
5782 }
5783 #endif  // CONFIG_INTERNAL_STATS
5784
5785 // Adjust the maximum allowable frame size for the target level.
5786 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5787   RATE_CONTROL *const rc = &cpi->rc;
5788   LevelConstraint *const ls = &cpi->level_constraint;
5789   VP9_COMMON *const cm = &cpi->common;
5790   const double max_cpb_size = ls->max_cpb_size;
5791   vpx_clear_system_state();
5792   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5793   if (frame_is_intra_only(cm)) {
5794     rc->max_frame_bandwidth =
5795         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5796   } else if (arf_src_index > 0) {
5797     rc->max_frame_bandwidth =
5798         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5799   } else {
5800     rc->max_frame_bandwidth =
5801         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5802   }
5803 }
5804
5805 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5806   VP9_COMMON *const cm = &cpi->common;
5807   Vp9LevelInfo *const level_info = &cpi->level_info;
5808   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5809   Vp9LevelStats *const level_stats = &level_info->level_stats;
5810   int i, idx;
5811   uint64_t luma_samples, dur_end;
5812   const uint32_t luma_pic_size = cm->width * cm->height;
5813   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5814   LevelConstraint *const level_constraint = &cpi->level_constraint;
5815   const int8_t level_index = level_constraint->level_index;
5816   double cpb_data_size;
5817
5818   vpx_clear_system_state();
5819
5820   // update level_stats
5821   level_stats->total_compressed_size += *size;
5822   if (cm->show_frame) {
5823     level_stats->total_uncompressed_size +=
5824         luma_pic_size +
5825         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5826     level_stats->time_encoded =
5827         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5828         (double)TICKS_PER_SEC;
5829   }
5830
5831   if (arf_src_index > 0) {
5832     if (!level_stats->seen_first_altref) {
5833       level_stats->seen_first_altref = 1;
5834     } else if (level_stats->frames_since_last_altref <
5835                level_spec->min_altref_distance) {
5836       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5837     }
5838     level_stats->frames_since_last_altref = 0;
5839   } else {
5840     ++level_stats->frames_since_last_altref;
5841   }
5842
5843   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5844     idx = (level_stats->frame_window_buffer.start +
5845            level_stats->frame_window_buffer.len++) %
5846           FRAME_WINDOW_SIZE;
5847   } else {
5848     idx = level_stats->frame_window_buffer.start;
5849     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5850   }
5851   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5852   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5853   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5854
5855   if (cm->frame_type == KEY_FRAME) {
5856     level_stats->ref_refresh_map = 0;
5857   } else {
5858     int count = 0;
5859     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5860     // Also need to consider the case where the encoder refers to a buffer
5861     // that has been implicitly refreshed after encoding a keyframe.
5862     if (!cm->intra_only) {
5863       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5864       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5865       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5866     }
5867     for (i = 0; i < REF_FRAMES; ++i) {
5868       count += (level_stats->ref_refresh_map >> i) & 1;
5869     }
5870     if (count > level_spec->max_ref_frame_buffers) {
5871       level_spec->max_ref_frame_buffers = count;
5872     }
5873   }
5874
5875   // update average_bitrate
5876   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5877                                 125.0 / level_stats->time_encoded;
5878
5879   // update max_luma_sample_rate
5880   luma_samples = 0;
5881   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5882     idx = (level_stats->frame_window_buffer.start +
5883            level_stats->frame_window_buffer.len - 1 - i) %
5884           FRAME_WINDOW_SIZE;
5885     if (i == 0) {
5886       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5887     }
5888     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5889         TICKS_PER_SEC) {
5890       break;
5891     }
5892     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5893   }
5894   if (luma_samples > level_spec->max_luma_sample_rate) {
5895     level_spec->max_luma_sample_rate = luma_samples;
5896   }
5897
5898   // update max_cpb_size
5899   cpb_data_size = 0;
5900   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5901     if (i >= level_stats->frame_window_buffer.len) break;
5902     idx = (level_stats->frame_window_buffer.start +
5903            level_stats->frame_window_buffer.len - 1 - i) %
5904           FRAME_WINDOW_SIZE;
5905     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5906   }
5907   cpb_data_size = cpb_data_size / 125.0;
5908   if (cpb_data_size > level_spec->max_cpb_size) {
5909     level_spec->max_cpb_size = cpb_data_size;
5910   }
5911
5912   // update max_luma_picture_size
5913   if (luma_pic_size > level_spec->max_luma_picture_size) {
5914     level_spec->max_luma_picture_size = luma_pic_size;
5915   }
5916
5917   // update max_luma_picture_breadth
5918   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5919     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5920   }
5921
5922   // update compression_ratio
5923   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5924                                   cm->bit_depth /
5925                                   level_stats->total_compressed_size / 8.0;
5926
5927   // update max_col_tiles
5928   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5929     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5930   }
5931
5932   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5933     if (level_spec->max_luma_picture_size >
5934         vp9_level_defs[level_index].max_luma_picture_size) {
5935       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5936       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5937                          "Failed to encode to the target level %d. %s",
5938                          vp9_level_defs[level_index].level,
5939                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5940     }
5941
5942     if (level_spec->max_luma_picture_breadth >
5943         vp9_level_defs[level_index].max_luma_picture_breadth) {
5944       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5945       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5946                          "Failed to encode to the target level %d. %s",
5947                          vp9_level_defs[level_index].level,
5948                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5949     }
5950
5951     if ((double)level_spec->max_luma_sample_rate >
5952         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5953             (1 + SAMPLE_RATE_GRACE_P)) {
5954       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5955       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5956                          "Failed to encode to the target level %d. %s",
5957                          vp9_level_defs[level_index].level,
5958                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5959     }
5960
5961     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5962       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5963       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5964                          "Failed to encode to the target level %d. %s",
5965                          vp9_level_defs[level_index].level,
5966                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5967     }
5968
5969     if (level_spec->min_altref_distance <
5970         vp9_level_defs[level_index].min_altref_distance) {
5971       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5972       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5973                          "Failed to encode to the target level %d. %s",
5974                          vp9_level_defs[level_index].level,
5975                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5976     }
5977
5978     if (level_spec->max_ref_frame_buffers >
5979         vp9_level_defs[level_index].max_ref_frame_buffers) {
5980       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5981       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5982                          "Failed to encode to the target level %d. %s",
5983                          vp9_level_defs[level_index].level,
5984                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5985     }
5986
5987     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5988       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5989       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5990                          "Failed to encode to the target level %d. %s",
5991                          vp9_level_defs[level_index].level,
5992                          level_fail_messages[CPB_TOO_LARGE]);
5993     }
5994
5995     // Set an upper bound for the next frame size. It will be used in
5996     // level_rc_framerate() before encoding the next frame.
5997     cpb_data_size = 0;
5998     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5999       if (i >= level_stats->frame_window_buffer.len) break;
6000       idx = (level_stats->frame_window_buffer.start +
6001              level_stats->frame_window_buffer.len - 1 - i) %
6002             FRAME_WINDOW_SIZE;
6003       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
6004     }
6005     cpb_data_size = cpb_data_size / 125.0;
6006     level_constraint->max_frame_size =
6007         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
6008               1000.0);
6009     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
6010       level_constraint->max_frame_size >>= 1;
6011   }
6012 }
6013
6014 typedef struct GF_PICTURE {
6015   YV12_BUFFER_CONFIG *frame;
6016   int ref_frame[3];
6017   FRAME_UPDATE_TYPE update_type;
6018 } GF_PICTURE;
6019
6020 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6021                             const GF_GROUP *gf_group, int *tpl_group_frames) {
6022   VP9_COMMON *cm = &cpi->common;
6023   int frame_idx = 0;
6024   int i;
6025   int gld_index = -1;
6026   int alt_index = -1;
6027   int lst_index = -1;
6028   int arf_index_stack[MAX_ARF_LAYERS];
6029   int arf_stack_size = 0;
6030   int extend_frame_count = 0;
6031   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
6032   int frame_gop_offset = 0;
6033
6034   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
6035   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
6036
6037   memset(recon_frame_index, -1, sizeof(recon_frame_index));
6038   stack_init(arf_index_stack, MAX_ARF_LAYERS);
6039
6040   // TODO(jingning): To be used later for gf frame type parsing.
6041   (void)gf_group;
6042
6043   for (i = 0; i < FRAME_BUFFERS; ++i) {
6044     if (frame_bufs[i].ref_count == 0) {
6045       alloc_frame_mvs(cm, i);
6046       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
6047                                    cm->subsampling_x, cm->subsampling_y,
6048 #if CONFIG_VP9_HIGHBITDEPTH
6049                                    cm->use_highbitdepth,
6050 #endif
6051                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
6052                                    NULL, NULL, NULL))
6053         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
6054                            "Failed to allocate frame buffer");
6055
6056       recon_frame_index[frame_idx] = i;
6057       ++frame_idx;
6058
6059       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
6060     }
6061   }
6062
6063   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
6064     assert(recon_frame_index[i] >= 0);
6065     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
6066   }
6067
6068   *tpl_group_frames = 0;
6069
6070   // Initialize Golden reference frame.
6071   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
6072   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
6073   gf_picture[0].update_type = gf_group->update_type[0];
6074   gld_index = 0;
6075   ++*tpl_group_frames;
6076
6077   // Initialize base layer ARF frame
6078   gf_picture[1].frame = cpi->Source;
6079   gf_picture[1].ref_frame[0] = gld_index;
6080   gf_picture[1].ref_frame[1] = lst_index;
6081   gf_picture[1].ref_frame[2] = alt_index;
6082   gf_picture[1].update_type = gf_group->update_type[1];
6083   alt_index = 1;
6084   ++*tpl_group_frames;
6085
6086   // Initialize P frames
6087   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6088     struct lookahead_entry *buf;
6089     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
6090     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6091
6092     if (buf == NULL) break;
6093
6094     gf_picture[frame_idx].frame = &buf->img;
6095     gf_picture[frame_idx].ref_frame[0] = gld_index;
6096     gf_picture[frame_idx].ref_frame[1] = lst_index;
6097     gf_picture[frame_idx].ref_frame[2] = alt_index;
6098     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
6099
6100     switch (gf_group->update_type[frame_idx]) {
6101       case ARF_UPDATE:
6102         stack_push(arf_index_stack, alt_index, arf_stack_size);
6103         ++arf_stack_size;
6104         alt_index = frame_idx;
6105         break;
6106       case LF_UPDATE: lst_index = frame_idx; break;
6107       case OVERLAY_UPDATE:
6108         gld_index = frame_idx;
6109         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6110         --arf_stack_size;
6111         break;
6112       case USE_BUF_FRAME:
6113         lst_index = alt_index;
6114         alt_index = stack_pop(arf_index_stack, arf_stack_size);
6115         --arf_stack_size;
6116         break;
6117       default: break;
6118     }
6119
6120     ++*tpl_group_frames;
6121
6122     // The length of group of pictures is baseline_gf_interval, plus the
6123     // beginning golden frame from last GOP, plus the last overlay frame in
6124     // the same GOP.
6125     if (frame_idx == gf_group->gf_group_size) break;
6126   }
6127
6128   alt_index = -1;
6129   ++frame_idx;
6130   ++frame_gop_offset;
6131
6132   // Extend two frames outside the current gf group.
6133   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
6134     struct lookahead_entry *buf =
6135         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
6136
6137     if (buf == NULL) break;
6138
6139     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
6140
6141     gf_picture[frame_idx].frame = &buf->img;
6142     gf_picture[frame_idx].ref_frame[0] = gld_index;
6143     gf_picture[frame_idx].ref_frame[1] = lst_index;
6144     gf_picture[frame_idx].ref_frame[2] = alt_index;
6145     gf_picture[frame_idx].update_type = LF_UPDATE;
6146     lst_index = frame_idx;
6147     ++*tpl_group_frames;
6148     ++extend_frame_count;
6149     ++frame_gop_offset;
6150   }
6151 }
6152
6153 static void init_tpl_stats(VP9_COMP *cpi) {
6154   int frame_idx;
6155   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
6156     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6157     memset(tpl_frame->tpl_stats_ptr, 0,
6158            tpl_frame->height * tpl_frame->width *
6159                sizeof(*tpl_frame->tpl_stats_ptr));
6160     tpl_frame->is_valid = 0;
6161   }
6162 }
6163
6164 #if CONFIG_NON_GREEDY_MV
6165 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6166                                          MotionField *motion_field,
6167                                          int frame_idx, uint8_t *cur_frame_buf,
6168                                          uint8_t *ref_frame_buf, int stride,
6169                                          BLOCK_SIZE bsize, int mi_row,
6170                                          int mi_col, MV *mv) {
6171   MACROBLOCK *const x = &td->mb;
6172   MACROBLOCKD *const xd = &x->e_mbd;
6173   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6174   int step_param;
6175   uint32_t bestsme = UINT_MAX;
6176   const MvLimits tmp_mv_limits = x->mv_limits;
6177   // lambda is used to adjust the importance of motion vector consistency.
6178   // TODO(angiebird): Figure out lambda's proper value.
6179   const int lambda = cpi->tpl_stats[frame_idx].lambda;
6180   int_mv nb_full_mvs[NB_MVS_NUM];
6181   int nb_full_mv_num;
6182
6183   MV best_ref_mv1 = { 0, 0 };
6184   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6185
6186   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6187   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6188
6189   // Setup frame pointers
6190   x->plane[0].src.buf = cur_frame_buf;
6191   x->plane[0].src.stride = stride;
6192   xd->plane[0].pre[0].buf = ref_frame_buf;
6193   xd->plane[0].pre[0].stride = stride;
6194
6195   step_param = mv_sf->reduce_first_step_size;
6196   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6197
6198   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6199
6200   nb_full_mv_num =
6201       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
6202   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
6203                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
6204
6205   /* restore UMV window */
6206   x->mv_limits = tmp_mv_limits;
6207
6208   return bestsme;
6209 }
6210
6211 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
6212                                         uint8_t *cur_frame_buf,
6213                                         uint8_t *ref_frame_buf, int stride,
6214                                         BLOCK_SIZE bsize, MV *mv) {
6215   MACROBLOCK *const x = &td->mb;
6216   MACROBLOCKD *const xd = &x->e_mbd;
6217   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6218   uint32_t bestsme = UINT_MAX;
6219   uint32_t distortion;
6220   uint32_t sse;
6221   int cost_list[5];
6222
6223   MV best_ref_mv1 = { 0, 0 };
6224
6225   // Setup frame pointers
6226   x->plane[0].src.buf = cur_frame_buf;
6227   x->plane[0].src.stride = stride;
6228   xd->plane[0].pre[0].buf = ref_frame_buf;
6229   xd->plane[0].pre[0].stride = stride;
6230
6231   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6232   // Ignore mv costing by sending NULL pointer instead of cost array
6233   bestsme = cpi->find_fractional_mv_step(
6234       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6235       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6236       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6237       USE_2_TAPS);
6238
6239   return bestsme;
6240 }
6241
6242 #else  // CONFIG_NON_GREEDY_MV
6243 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
6244                                               uint8_t *cur_frame_buf,
6245                                               uint8_t *ref_frame_buf,
6246                                               int stride, BLOCK_SIZE bsize,
6247                                               MV *mv) {
6248   MACROBLOCK *const x = &td->mb;
6249   MACROBLOCKD *const xd = &x->e_mbd;
6250   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
6251   const SEARCH_METHODS search_method = NSTEP;
6252   int step_param;
6253   int sadpb = x->sadperbit16;
6254   uint32_t bestsme = UINT_MAX;
6255   uint32_t distortion;
6256   uint32_t sse;
6257   int cost_list[5];
6258   const MvLimits tmp_mv_limits = x->mv_limits;
6259
6260   MV best_ref_mv1 = { 0, 0 };
6261   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
6262
6263   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
6264   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
6265
6266   // Setup frame pointers
6267   x->plane[0].src.buf = cur_frame_buf;
6268   x->plane[0].src.stride = stride;
6269   xd->plane[0].pre[0].buf = ref_frame_buf;
6270   xd->plane[0].pre[0].stride = stride;
6271
6272   step_param = mv_sf->reduce_first_step_size;
6273   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
6274
6275   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
6276
6277   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
6278                         search_method, sadpb, cond_cost_list(cpi, cost_list),
6279                         &best_ref_mv1, mv, 0, 0);
6280
6281   /* restore UMV window */
6282   x->mv_limits = tmp_mv_limits;
6283
6284   // TODO(yunqing): may use higher tap interp filter than 2 taps.
6285   // Ignore mv costing by sending NULL pointer instead of cost array
6286   bestsme = cpi->find_fractional_mv_step(
6287       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
6288       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
6289       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
6290       USE_2_TAPS);
6291
6292   return bestsme;
6293 }
6294 #endif
6295
6296 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6297                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6298   int width = 0, height = 0;
6299   int bw = 4 << b_width_log2_lookup[bsize];
6300   int bh = 4 << b_height_log2_lookup[bsize];
6301
6302   switch (block) {
6303     case 0:
6304       width = grid_pos_col + bw - ref_pos_col;
6305       height = grid_pos_row + bh - ref_pos_row;
6306       break;
6307     case 1:
6308       width = ref_pos_col + bw - grid_pos_col;
6309       height = grid_pos_row + bh - ref_pos_row;
6310       break;
6311     case 2:
6312       width = grid_pos_col + bw - ref_pos_col;
6313       height = ref_pos_row + bh - grid_pos_row;
6314       break;
6315     case 3:
6316       width = ref_pos_col + bw - grid_pos_col;
6317       height = ref_pos_row + bh - grid_pos_row;
6318       break;
6319     default: assert(0);
6320   }
6321
6322   return width * height;
6323 }
6324
6325 static int round_floor(int ref_pos, int bsize_pix) {
6326   int round;
6327   if (ref_pos < 0)
6328     round = -(1 + (-ref_pos - 1) / bsize_pix);
6329   else
6330     round = ref_pos / bsize_pix;
6331
6332   return round;
6333 }
6334
6335 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6336                             BLOCK_SIZE bsize, int stride) {
6337   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6338   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6339   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6340   int idx, idy;
6341
6342   for (idy = 0; idy < mi_height; ++idy) {
6343     for (idx = 0; idx < mi_width; ++idx) {
6344       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6345       const int64_t mc_flow = tpl_ptr->mc_flow;
6346       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6347       *tpl_ptr = *src_stats;
6348       tpl_ptr->mc_flow = mc_flow;
6349       tpl_ptr->mc_ref_cost = mc_ref_cost;
6350       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6351     }
6352   }
6353 }
6354
6355 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6356                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6357   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6358   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6359   MV mv = tpl_stats->mv.as_mv;
6360   int mv_row = mv.row >> 3;
6361   int mv_col = mv.col >> 3;
6362
6363   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6364   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6365
6366   const int bw = 4 << b_width_log2_lookup[bsize];
6367   const int bh = 4 << b_height_log2_lookup[bsize];
6368   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6369   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6370   const int pix_num = bw * bh;
6371
6372   // top-left on grid block location in pixel
6373   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6374   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6375   int block;
6376
6377   for (block = 0; block < 4; ++block) {
6378     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6379     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6380
6381     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6382         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6383       int overlap_area = get_overlap_area(
6384           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6385       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6386       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6387
6388       int64_t mc_flow = tpl_stats->mc_dep_cost -
6389                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6390                             tpl_stats->intra_cost;
6391
6392       int idx, idy;
6393
6394       for (idy = 0; idy < mi_height; ++idy) {
6395         for (idx = 0; idx < mi_width; ++idx) {
6396           TplDepStats *des_stats =
6397               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6398                          (ref_mi_col + idx)];
6399
6400           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6401           des_stats->mc_ref_cost +=
6402               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6403               pix_num;
6404           assert(overlap_area >= 0);
6405         }
6406       }
6407     }
6408   }
6409 }
6410
6411 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6412                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6413   int idx, idy;
6414   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6415   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6416
6417   for (idy = 0; idy < mi_height; ++idy) {
6418     for (idx = 0; idx < mi_width; ++idx) {
6419       TplDepStats *tpl_ptr =
6420           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6421       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6422                          BLOCK_8X8);
6423     }
6424   }
6425 }
6426
6427 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6428                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6429                                TX_SIZE tx_size, int64_t *recon_error,
6430                                int64_t *sse) {
6431   MACROBLOCKD *const xd = &x->e_mbd;
6432   const struct macroblock_plane *const p = &x->plane[plane];
6433   const struct macroblockd_plane *const pd = &xd->plane[plane];
6434   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6435   uint16_t eob;
6436   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6437   const int shift = tx_size == TX_32X32 ? 0 : 2;
6438
6439 #if CONFIG_VP9_HIGHBITDEPTH
6440   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6441     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6442                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6443                                  &eob, scan_order->scan, scan_order->iscan);
6444   } else {
6445     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6446                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6447                           scan_order->scan, scan_order->iscan);
6448   }
6449 #else
6450   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6451                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6452                         scan_order->iscan);
6453 #endif  // CONFIG_VP9_HIGHBITDEPTH
6454
6455   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6456   *recon_error = VPXMAX(*recon_error, 1);
6457
6458   *sse = (*sse) >> shift;
6459   *sse = VPXMAX(*sse, 1);
6460 }
6461
6462 #if CONFIG_VP9_HIGHBITDEPTH
6463 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6464                          TX_SIZE tx_size) {
6465   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6466   switch (tx_size) {
6467     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6468     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6469     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6470     default: assert(0);
6471   }
6472 }
6473 #endif  // CONFIG_VP9_HIGHBITDEPTH
6474
6475 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6476                   TX_SIZE tx_size) {
6477   switch (tx_size) {
6478     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6479     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6480     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6481     default: assert(0);
6482   }
6483 }
6484
6485 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6486                           int mi_col) {
6487   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6488   x->mv_limits.row_max =
6489       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6490   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6491   x->mv_limits.col_max =
6492       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6493 }
6494
6495 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6496                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6497                             int frame_idx, TplDepFrame *tpl_frame,
6498                             int16_t *src_diff, tran_low_t *coeff,
6499                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6500                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6501                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6502                             int64_t *recon_error, int64_t *sse) {
6503   VP9_COMMON *cm = &cpi->common;
6504   ThreadData *td = &cpi->td;
6505
6506   const int bw = 4 << b_width_log2_lookup[bsize];
6507   const int bh = 4 << b_height_log2_lookup[bsize];
6508   const int pix_num = bw * bh;
6509   int best_rf_idx = -1;
6510   int_mv best_mv;
6511   int64_t best_inter_cost = INT64_MAX;
6512   int64_t inter_cost;
6513   int rf_idx;
6514   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6515
6516   int64_t best_intra_cost = INT64_MAX;
6517   int64_t intra_cost;
6518   PREDICTION_MODE mode;
6519   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6520   MODE_INFO mi_above, mi_left;
6521   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6522   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6523   TplDepStats *tpl_stats =
6524       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6525
6526   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6527   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6528   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6529   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6530   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6531   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6532
6533   // Intra prediction search
6534   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6535     uint8_t *src, *dst;
6536     int src_stride, dst_stride;
6537
6538     src = xd->cur_buf->y_buffer + mb_y_offset;
6539     src_stride = xd->cur_buf->y_stride;
6540
6541     dst = &predictor[0];
6542     dst_stride = bw;
6543
6544     xd->mi[0]->sb_type = bsize;
6545     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6546
6547     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6548                             src_stride, dst, dst_stride, 0, 0, 0);
6549
6550 #if CONFIG_VP9_HIGHBITDEPTH
6551     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6552       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6553                                 dst_stride, xd->bd);
6554       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6555       intra_cost = vpx_highbd_satd(coeff, pix_num);
6556     } else {
6557       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6558                          dst_stride);
6559       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6560       intra_cost = vpx_satd(coeff, pix_num);
6561     }
6562 #else
6563     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6564     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6565     intra_cost = vpx_satd(coeff, pix_num);
6566 #endif  // CONFIG_VP9_HIGHBITDEPTH
6567
6568     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6569   }
6570
6571   // Motion compensated prediction
6572   best_mv.as_int = 0;
6573
6574   set_mv_limits(cm, x, mi_row, mi_col);
6575
6576   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6577     int_mv mv;
6578 #if CONFIG_NON_GREEDY_MV
6579     MotionField *motion_field;
6580 #endif
6581     if (ref_frame[rf_idx] == NULL) continue;
6582
6583 #if CONFIG_NON_GREEDY_MV
6584     (void)td;
6585     motion_field = vp9_motion_field_info_get_motion_field(
6586         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6587     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6588 #else
6589     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6590                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6591                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6592 #endif
6593
6594 #if CONFIG_VP9_HIGHBITDEPTH
6595     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6596       vp9_highbd_build_inter_predictor(
6597           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6598           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6599           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6600           mi_row * MI_SIZE, xd->bd);
6601       vpx_highbd_subtract_block(
6602           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6603           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6604       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6605       inter_cost = vpx_highbd_satd(coeff, pix_num);
6606     } else {
6607       vp9_build_inter_predictor(
6608           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6609           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6610           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6611       vpx_subtract_block(bh, bw, src_diff, bw,
6612                          xd->cur_buf->y_buffer + mb_y_offset,
6613                          xd->cur_buf->y_stride, &predictor[0], bw);
6614       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6615       inter_cost = vpx_satd(coeff, pix_num);
6616     }
6617 #else
6618     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6619                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6620                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6621                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6622     vpx_subtract_block(bh, bw, src_diff, bw,
6623                        xd->cur_buf->y_buffer + mb_y_offset,
6624                        xd->cur_buf->y_stride, &predictor[0], bw);
6625     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6626     inter_cost = vpx_satd(coeff, pix_num);
6627 #endif
6628
6629     if (inter_cost < best_inter_cost) {
6630       best_rf_idx = rf_idx;
6631       best_inter_cost = inter_cost;
6632       best_mv.as_int = mv.as_int;
6633       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6634                          sse);
6635     }
6636   }
6637   best_intra_cost = VPXMAX(best_intra_cost, 1);
6638   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6639   tpl_stats->inter_cost = VPXMAX(
6640       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6641   tpl_stats->intra_cost = VPXMAX(
6642       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6643   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6644   tpl_stats->mv.as_int = best_mv.as_int;
6645 }
6646
6647 #if CONFIG_NON_GREEDY_MV
6648 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6649                                   int frame_idx, int rf_idx, int mi_row,
6650                                   int mi_col, struct buf_2d *src,
6651                                   struct buf_2d *pre) {
6652   const int mb_y_offset =
6653       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6654   YV12_BUFFER_CONFIG *ref_frame = NULL;
6655   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6656   if (ref_frame_idx != -1) {
6657     ref_frame = gf_picture[ref_frame_idx].frame;
6658     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6659     src->stride = xd->cur_buf->y_stride;
6660     pre->buf = ref_frame->y_buffer + mb_y_offset;
6661     pre->stride = ref_frame->y_stride;
6662     assert(src->stride == pre->stride);
6663     return 1;
6664   } else {
6665     printf("invalid ref_frame_idx");
6666     assert(ref_frame_idx != -1);
6667     return 0;
6668   }
6669 }
6670
6671 #define kMvPreCheckLines 5
6672 #define kMvPreCheckSize 15
6673
6674 #define MV_REF_POS_NUM 3
6675 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6676   { -1, 0 },
6677   { 0, -1 },
6678   { -1, -1 },
6679 };
6680
6681 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6682                              int mi_col) {
6683   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6684 }
6685
6686 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6687                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6688   int i;
6689   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6690   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6691   int_mv nearest_mv, near_mv, invalid_mv;
6692   nearest_mv.as_int = INVALID_MV;
6693   near_mv.as_int = INVALID_MV;
6694   invalid_mv.as_int = INVALID_MV;
6695   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6696     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6697     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6698     assert(mv_ref_pos[i].row <= 0);
6699     assert(mv_ref_pos[i].col <= 0);
6700     if (nb_row >= 0 && nb_col >= 0) {
6701       if (nearest_mv.as_int == INVALID_MV) {
6702         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6703       } else {
6704         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6705         if (mv.as_int == nearest_mv.as_int) {
6706           continue;
6707         } else {
6708           near_mv = mv;
6709           break;
6710         }
6711       }
6712     }
6713   }
6714   if (nearest_mv.as_int == INVALID_MV) {
6715     nearest_mv.as_mv.row = 0;
6716     nearest_mv.as_mv.col = 0;
6717   }
6718   if (near_mv.as_int == INVALID_MV) {
6719     near_mv.as_mv.row = 0;
6720     near_mv.as_mv.col = 0;
6721   }
6722   if (mv_mode == NEAREST_MV_MODE) {
6723     return nearest_mv;
6724   }
6725   if (mv_mode == NEAR_MV_MODE) {
6726     return near_mv;
6727   }
6728   assert(0);
6729   return invalid_mv;
6730 }
6731
6732 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6733                                   MotionField *motion_field,
6734                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6735                                   int mi_row, int mi_col) {
6736   int_mv mv;
6737   switch (mv_mode) {
6738     case ZERO_MV_MODE:
6739       mv.as_mv.row = 0;
6740       mv.as_mv.col = 0;
6741       break;
6742     case NEW_MV_MODE:
6743       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6744       break;
6745     case NEAREST_MV_MODE:
6746       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6747       break;
6748     case NEAR_MV_MODE:
6749       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6750       break;
6751     default:
6752       mv.as_int = INVALID_MV;
6753       assert(0);
6754       break;
6755   }
6756   return mv;
6757 }
6758
6759 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6760                           GF_PICTURE *gf_picture, MotionField *motion_field,
6761                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6762                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6763                           int_mv *mv) {
6764   uint32_t sse;
6765   struct buf_2d src;
6766   struct buf_2d pre;
6767   MV full_mv;
6768   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6769                             mi_row, mi_col);
6770   full_mv = get_full_mv(&mv->as_mv);
6771   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6772                              &src, &pre)) {
6773     // TODO(angiebird): Consider subpixel when computing the sse.
6774     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6775                           pre.stride, &sse);
6776     return (double)(sse << VP9_DIST_SCALE_LOG2);
6777   } else {
6778     assert(0);
6779     return 0;
6780   }
6781 }
6782
6783 static int get_mv_mode_cost(int mv_mode) {
6784   // TODO(angiebird): The probabilities are roughly inferred from
6785   // default_inter_mode_probs. Check if there is a better way to set the
6786   // probabilities.
6787   const int zero_mv_prob = 16;
6788   const int new_mv_prob = 24 * 1;
6789   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6790   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6791   switch (mv_mode) {
6792     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6793     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6794     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6795     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6796     default: assert(0); return -1;
6797   }
6798 }
6799
6800 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6801   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6802                         log2(1 + abs(new_mv->col - ref_mv->col));
6803   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6804   return mv_diff_cost;
6805 }
6806 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6807                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6808                           int mi_col) {
6809   double mv_cost = get_mv_mode_cost(mv_mode);
6810   if (mv_mode == NEW_MV_MODE) {
6811     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6812                                     bsize, mi_row, mi_col)
6813                     .as_mv;
6814     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6815                                         tpl_frame, bsize, mi_row, mi_col)
6816                         .as_mv;
6817     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6818                                      bsize, mi_row, mi_col)
6819                      .as_mv;
6820     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6821     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6822     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6823   }
6824   return mv_cost;
6825 }
6826
6827 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6828                            GF_PICTURE *gf_picture, MotionField *motion_field,
6829                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6830                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6831                            int_mv *mv) {
6832   MACROBLOCKD *xd = &x->e_mbd;
6833   double mv_dist =
6834       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6835                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6836   double mv_cost =
6837       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6838   double mult = 180;
6839
6840   return mv_cost + mult * log2f(1 + mv_dist);
6841 }
6842
6843 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6844                                  GF_PICTURE *gf_picture,
6845                                  MotionField *motion_field, int frame_idx,
6846                                  TplDepFrame *tpl_frame, int rf_idx,
6847                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6848                                  double *rd, int_mv *mv) {
6849   int best_mv_mode = ZERO_MV_MODE;
6850   int update = 0;
6851   int mv_mode;
6852   *rd = 0;
6853   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6854     double this_rd;
6855     int_mv this_mv;
6856     if (mv_mode == NEW_MV_MODE) {
6857       continue;
6858     }
6859     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6860                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6861     if (update == 0) {
6862       *rd = this_rd;
6863       *mv = this_mv;
6864       best_mv_mode = mv_mode;
6865       update = 1;
6866     } else {
6867       if (this_rd < *rd) {
6868         *rd = this_rd;
6869         *mv = this_mv;
6870         best_mv_mode = mv_mode;
6871       }
6872     }
6873   }
6874   return best_mv_mode;
6875 }
6876
6877 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6878                             GF_PICTURE *gf_picture, MotionField *motion_field,
6879                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6880                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6881   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6882   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6883   int tmp_mv_mode_arr[kMvPreCheckSize];
6884   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6885   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6886   int_mv *select_mv_arr = cpi->select_mv_arr;
6887   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6888   int stride = tpl_frame->stride;
6889   double new_mv_rd = 0;
6890   double no_new_mv_rd = 0;
6891   double this_new_mv_rd = 0;
6892   double this_no_new_mv_rd = 0;
6893   int idx;
6894   int tmp_idx;
6895   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6896
6897   // no new mv
6898   // diagonal scan order
6899   tmp_idx = 0;
6900   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6901     int r;
6902     for (r = 0; r <= idx; ++r) {
6903       int c = idx - r;
6904       int nb_row = mi_row + r * mi_height;
6905       int nb_col = mi_col + c * mi_width;
6906       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6907         double this_rd;
6908         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6909         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6910             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6911             bsize, nb_row, nb_col, &this_rd, mv);
6912         if (r == 0 && c == 0) {
6913           this_no_new_mv_rd = this_rd;
6914         }
6915         no_new_mv_rd += this_rd;
6916         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6917         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6918         ++tmp_idx;
6919       }
6920     }
6921   }
6922
6923   // new mv
6924   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6925   this_new_mv_rd = eval_mv_mode(
6926       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6927       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6928   new_mv_rd = this_new_mv_rd;
6929   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6930   // beforehand.
6931   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6932     int r;
6933     for (r = 0; r <= idx; ++r) {
6934       int c = idx - r;
6935       int nb_row = mi_row + r * mi_height;
6936       int nb_col = mi_col + c * mi_width;
6937       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6938         double this_rd;
6939         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6940         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6941             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6942             bsize, nb_row, nb_col, &this_rd, mv);
6943         new_mv_rd += this_rd;
6944       }
6945     }
6946   }
6947
6948   // update best_mv_mode
6949   tmp_idx = 0;
6950   if (no_new_mv_rd < new_mv_rd) {
6951     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6952       int r;
6953       for (r = 0; r <= idx; ++r) {
6954         int c = idx - r;
6955         int nb_row = mi_row + r * mi_height;
6956         int nb_col = mi_col + c * mi_width;
6957         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6958           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6959           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6960           ++tmp_idx;
6961         }
6962       }
6963     }
6964     rd_diff_arr[mi_row * stride + mi_col] = 0;
6965   } else {
6966     rd_diff_arr[mi_row * stride + mi_col] =
6967         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6968   }
6969 }
6970
6971 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6972                                 GF_PICTURE *gf_picture,
6973                                 MotionField *motion_field, int frame_idx,
6974                                 TplDepFrame *tpl_frame, int rf_idx,
6975                                 BLOCK_SIZE bsize) {
6976   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6977   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6978   const int unit_rows = tpl_frame->mi_rows / mi_height;
6979   const int unit_cols = tpl_frame->mi_cols / mi_width;
6980   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6981   int idx;
6982   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6983     int r;
6984     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6985          ++r) {
6986       int c = idx - r;
6987       int mi_row = r * mi_height;
6988       int mi_col = c * mi_width;
6989       assert(c >= 0 && c < unit_cols);
6990       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6991       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6992       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6993                       rf_idx, bsize, mi_row, mi_col);
6994     }
6995   }
6996 }
6997
6998 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6999                              MotionField *motion_field, int frame_idx,
7000                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
7001                              int mi_row, int mi_col) {
7002   VP9_COMMON *cm = &cpi->common;
7003   MACROBLOCK *x = &td->mb;
7004   MACROBLOCKD *xd = &x->e_mbd;
7005   const int mb_y_offset =
7006       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
7007   assert(ref_frame != NULL);
7008   set_mv_limits(cm, x, mi_row, mi_col);
7009   {
7010     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
7011     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
7012     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
7013     const int stride = xd->cur_buf->y_stride;
7014     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
7015                              ref_frame_buf, stride, bsize, mi_row, mi_col,
7016                              &mv.as_mv);
7017     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
7018                             bsize, &mv.as_mv);
7019     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
7020   }
7021 }
7022
7023 static void build_motion_field(
7024     VP9_COMP *cpi, int frame_idx,
7025     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
7026   VP9_COMMON *cm = &cpi->common;
7027   ThreadData *td = &cpi->td;
7028   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7029   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7030   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7031   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
7032   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
7033   int mi_row, mi_col;
7034   int rf_idx;
7035
7036   tpl_frame->lambda = (pw * ph) >> 2;
7037   assert(pw * ph == tpl_frame->lambda << 2);
7038
7039   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7040     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7041         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7042     if (ref_frame[rf_idx] == NULL) {
7043       continue;
7044     }
7045     vp9_motion_field_reset_mvs(motion_field);
7046     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7047       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7048         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
7049                          bsize, mi_row, mi_col);
7050       }
7051     }
7052   }
7053 }
7054 #endif  // CONFIG_NON_GREEDY_MV
7055
7056 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
7057                               int frame_idx, BLOCK_SIZE bsize) {
7058   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7059   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
7060   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
7061
7062   VP9_COMMON *cm = &cpi->common;
7063   struct scale_factors sf;
7064   int rdmult, idx;
7065   ThreadData *td = &cpi->td;
7066   MACROBLOCK *x = &td->mb;
7067   MACROBLOCKD *xd = &x->e_mbd;
7068   int mi_row, mi_col;
7069
7070 #if CONFIG_VP9_HIGHBITDEPTH
7071   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
7072   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
7073   uint8_t *predictor;
7074 #else
7075   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
7076 #endif
7077   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
7078   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
7079   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
7080   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
7081
7082   const TX_SIZE tx_size = max_txsize_lookup[bsize];
7083   const int mi_height = num_8x8_blocks_high_lookup[bsize];
7084   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7085   int64_t recon_error, sse;
7086 #if CONFIG_NON_GREEDY_MV
7087   int square_block_idx;
7088   int rf_idx;
7089 #endif
7090
7091   // Setup scaling factor
7092 #if CONFIG_VP9_HIGHBITDEPTH
7093   vp9_setup_scale_factors_for_frame(
7094       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7095       this_frame->y_crop_width, this_frame->y_crop_height,
7096       cpi->common.use_highbitdepth);
7097
7098   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7099     predictor = CONVERT_TO_BYTEPTR(predictor16);
7100   else
7101     predictor = predictor8;
7102 #else
7103   vp9_setup_scale_factors_for_frame(
7104       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
7105       this_frame->y_crop_width, this_frame->y_crop_height);
7106 #endif  // CONFIG_VP9_HIGHBITDEPTH
7107
7108   // Prepare reference frame pointers. If any reference frame slot is
7109   // unavailable, the pointer will be set to Null.
7110   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
7111     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
7112     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
7113   }
7114
7115   xd->mi = cm->mi_grid_visible;
7116   xd->mi[0] = cm->mi;
7117   xd->cur_buf = this_frame;
7118
7119   // Get rd multiplier set up.
7120   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
7121   set_error_per_bit(&cpi->td.mb, rdmult);
7122   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
7123
7124   tpl_frame->is_valid = 1;
7125
7126   cm->base_qindex = tpl_frame->base_qindex;
7127   vp9_frame_init_quantizer(cpi);
7128
7129 #if CONFIG_NON_GREEDY_MV
7130   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
7131        ++square_block_idx) {
7132     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
7133     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
7134   }
7135   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7136     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7137     if (ref_frame_idx != -1) {
7138       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
7139           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
7140       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
7141                           tpl_frame, rf_idx, bsize);
7142     }
7143   }
7144 #endif
7145
7146   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7147     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7148       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
7149                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
7150                       tx_size, ref_frame, predictor, &recon_error, &sse);
7151       // Motion flow dependency dispenser.
7152       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
7153                       tpl_frame->stride);
7154
7155       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
7156                        bsize);
7157     }
7158   }
7159 }
7160
7161 #if CONFIG_NON_GREEDY_MV
7162 #define DUMP_TPL_STATS 0
7163 #if DUMP_TPL_STATS
7164 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
7165   int i, j;
7166   printf("%d %d\n", h, w);
7167   for (i = 0; i < h; ++i) {
7168     for (j = 0; j < w; ++j) {
7169       printf("%d ", buf[(row + i) * stride + col + j]);
7170     }
7171   }
7172   printf("\n");
7173 }
7174
7175 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
7176   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
7177            frame_buf->y_width);
7178   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
7179            frame_buf->uv_height, frame_buf->uv_width);
7180   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
7181            frame_buf->uv_height, frame_buf->uv_width);
7182 }
7183
7184 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
7185                            const GF_GROUP *gf_group,
7186                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
7187   int frame_idx;
7188   const VP9_COMMON *cm = &cpi->common;
7189   int rf_idx;
7190   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
7191     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7192       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
7193       int mi_row, mi_col;
7194       int ref_frame_idx;
7195       const int mi_height = num_8x8_blocks_high_lookup[bsize];
7196       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
7197       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
7198       if (ref_frame_idx != -1) {
7199         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
7200         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
7201         const int ref_gf_frame_offset =
7202             gf_group->frame_gop_index[ref_frame_idx];
7203         printf("=\n");
7204         printf(
7205             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
7206             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
7207             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
7208             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
7209         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7210           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7211             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7212               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
7213                                                        frame_idx, rf_idx, bsize,
7214                                                        mi_row, mi_col);
7215               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
7216                      mv.as_mv.col);
7217             }
7218           }
7219         }
7220         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
7221           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
7222             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
7223               const TplDepStats *tpl_ptr =
7224                   &tpl_frame
7225                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
7226               printf("%f ", tpl_ptr->feature_score);
7227             }
7228           }
7229         }
7230         printf("\n");
7231
7232         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
7233           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
7234             const int mv_mode =
7235                 tpl_frame
7236                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
7237             printf("%d ", mv_mode);
7238           }
7239         }
7240         printf("\n");
7241
7242         dump_frame_buf(gf_picture[frame_idx].frame);
7243         dump_frame_buf(ref_frame_buf);
7244       }
7245     }
7246   }
7247 }
7248 #endif  // DUMP_TPL_STATS
7249 #endif  // CONFIG_NON_GREEDY_MV
7250
7251 static void init_tpl_buffer(VP9_COMP *cpi) {
7252   VP9_COMMON *cm = &cpi->common;
7253   int frame;
7254
7255   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7256   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7257 #if CONFIG_NON_GREEDY_MV
7258   int rf_idx;
7259
7260   vpx_free(cpi->select_mv_arr);
7261   CHECK_MEM_ERROR(
7262       cm, cpi->select_mv_arr,
7263       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
7264 #endif
7265
7266   // TODO(jingning): Reduce the actual memory use for tpl model build up.
7267   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7268     if (cpi->tpl_stats[frame].width >= mi_cols &&
7269         cpi->tpl_stats[frame].height >= mi_rows &&
7270         cpi->tpl_stats[frame].tpl_stats_ptr)
7271       continue;
7272
7273 #if CONFIG_NON_GREEDY_MV
7274     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7275       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7276       CHECK_MEM_ERROR(
7277           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
7278           vpx_calloc(mi_rows * mi_cols * 4,
7279                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
7280       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7281       CHECK_MEM_ERROR(
7282           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
7283           vpx_calloc(mi_rows * mi_cols * 4,
7284                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
7285     }
7286 #endif
7287     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7288     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
7289                     vpx_calloc(mi_rows * mi_cols,
7290                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
7291     cpi->tpl_stats[frame].is_valid = 0;
7292     cpi->tpl_stats[frame].width = mi_cols;
7293     cpi->tpl_stats[frame].height = mi_rows;
7294     cpi->tpl_stats[frame].stride = mi_cols;
7295     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
7296     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7297   }
7298
7299   for (frame = 0; frame < REF_FRAMES; ++frame) {
7300     cpi->enc_frame_buf[frame].mem_valid = 0;
7301     cpi->enc_frame_buf[frame].released = 1;
7302   }
7303 }
7304
7305 static void free_tpl_buffer(VP9_COMP *cpi) {
7306   int frame;
7307 #if CONFIG_NON_GREEDY_MV
7308   vp9_free_motion_field_info(&cpi->motion_field_info);
7309   vpx_free(cpi->select_mv_arr);
7310 #endif
7311   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7312 #if CONFIG_NON_GREEDY_MV
7313     int rf_idx;
7314     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7315       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7316       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7317     }
7318 #endif
7319     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7320     cpi->tpl_stats[frame].is_valid = 0;
7321   }
7322 }
7323
7324 static void setup_tpl_stats(VP9_COMP *cpi) {
7325   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7326   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7327   int tpl_group_frames = 0;
7328   int frame_idx;
7329   cpi->tpl_bsize = BLOCK_32X32;
7330
7331   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7332
7333   init_tpl_stats(cpi);
7334
7335   // Backward propagation from tpl_group_frames to 1.
7336   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7337     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7338     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7339   }
7340 #if CONFIG_NON_GREEDY_MV
7341   cpi->tpl_ready = 1;
7342 #if DUMP_TPL_STATS
7343   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7344 #endif  // DUMP_TPL_STATS
7345 #endif  // CONFIG_NON_GREEDY_MV
7346 }
7347
7348 #if !CONFIG_REALTIME_ONLY
7349 #if CONFIG_RATE_CTRL
7350 static void copy_frame_counts(const FRAME_COUNTS *input_counts,
7351                               FRAME_COUNTS *output_counts) {
7352   int i, j, k, l, m, n;
7353   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
7354     for (j = 0; j < INTRA_MODES; ++j) {
7355       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
7356     }
7357   }
7358   for (i = 0; i < INTRA_MODES; ++i) {
7359     for (j = 0; j < INTRA_MODES; ++j) {
7360       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
7361     }
7362   }
7363   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
7364     for (j = 0; j < PARTITION_TYPES; ++j) {
7365       output_counts->partition[i][j] = input_counts->partition[i][j];
7366     }
7367   }
7368   for (i = 0; i < TX_SIZES; ++i) {
7369     for (j = 0; j < PLANE_TYPES; ++j) {
7370       for (k = 0; k < REF_TYPES; ++k) {
7371         for (l = 0; l < COEF_BANDS; ++l) {
7372           for (m = 0; m < COEFF_CONTEXTS; ++m) {
7373             output_counts->eob_branch[i][j][k][l][m] =
7374                 input_counts->eob_branch[i][j][k][l][m];
7375             for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
7376               output_counts->coef[i][j][k][l][m][n] =
7377                   input_counts->coef[i][j][k][l][m][n];
7378             }
7379           }
7380         }
7381       }
7382     }
7383   }
7384   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
7385     for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
7386       output_counts->switchable_interp[i][j] =
7387           input_counts->switchable_interp[i][j];
7388     }
7389   }
7390   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
7391     for (j = 0; j < INTER_MODES; ++j) {
7392       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
7393     }
7394   }
7395   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
7396     for (j = 0; j < 2; ++j) {
7397       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
7398     }
7399   }
7400   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
7401     for (j = 0; j < 2; ++j) {
7402       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
7403     }
7404   }
7405   for (i = 0; i < REF_CONTEXTS; ++i) {
7406     for (j = 0; j < 2; ++j) {
7407       for (k = 0; k < 2; ++k) {
7408         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
7409       }
7410     }
7411   }
7412   for (i = 0; i < REF_CONTEXTS; ++i) {
7413     for (j = 0; j < 2; ++j) {
7414       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
7415     }
7416   }
7417   for (i = 0; i < SKIP_CONTEXTS; ++i) {
7418     for (j = 0; j < 2; ++j) {
7419       output_counts->skip[i][j] = input_counts->skip[i][j];
7420     }
7421   }
7422   for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
7423     for (j = 0; j < TX_SIZES; j++) {
7424       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
7425     }
7426     for (j = 0; j < TX_SIZES - 1; j++) {
7427       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
7428     }
7429     for (j = 0; j < TX_SIZES - 2; j++) {
7430       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
7431     }
7432   }
7433   for (i = 0; i < TX_SIZES; i++) {
7434     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
7435   }
7436   for (i = 0; i < MV_JOINTS; i++) {
7437     output_counts->mv.joints[i] = input_counts->mv.joints[i];
7438   }
7439   for (k = 0; k < 2; k++) {
7440     nmv_component_counts *const comps = &output_counts->mv.comps[k];
7441     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
7442     for (i = 0; i < 2; i++) {
7443       comps->sign[i] = comps_t->sign[i];
7444       comps->class0_hp[i] = comps_t->class0_hp[i];
7445       comps->hp[i] = comps_t->hp[i];
7446     }
7447     for (i = 0; i < MV_CLASSES; i++) {
7448       comps->classes[i] = comps_t->classes[i];
7449     }
7450     for (i = 0; i < CLASS0_SIZE; i++) {
7451       comps->class0[i] = comps_t->class0[i];
7452       for (j = 0; j < MV_FP_SIZE; j++) {
7453         comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
7454       }
7455     }
7456     for (i = 0; i < MV_OFFSET_BITS; i++) {
7457       for (j = 0; j < 2; j++) {
7458         comps->bits[i][j] = comps_t->bits[i][j];
7459       }
7460     }
7461     for (i = 0; i < MV_FP_SIZE; i++) {
7462       comps->fp[i] = comps_t->fp[i];
7463     }
7464   }
7465 }
7466
7467 static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
7468                                         IMAGE_BUFFER *image_buffer) {
7469   const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
7470                                    yv12_buffer->v_buffer };
7471   const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
7472                                  yv12_buffer->uv_stride };
7473   const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
7474                         yv12_buffer->uv_crop_width };
7475   const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
7476                         yv12_buffer->uv_crop_height };
7477   int plane;
7478   for (plane = 0; plane < 3; ++plane) {
7479     const int src_stride = src_stride_ls[plane];
7480     const int w = w_ls[plane];
7481     const int h = h_ls[plane];
7482     const uint8_t *src_buf = src_buf_ls[plane];
7483     uint8_t *dst_buf = image_buffer->plane_buffer[plane];
7484     int r;
7485     assert(image_buffer->plane_width[plane] == w);
7486     assert(image_buffer->plane_height[plane] == h);
7487     for (r = 0; r < h; ++r) {
7488       memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
7489       src_buf += src_stride;
7490       dst_buf += w;
7491     }
7492   }
7493 }
7494 #endif  // CONFIG_RATE_CTRL
7495 static void update_encode_frame_result(
7496     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
7497     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
7498     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
7499     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
7500 #if CONFIG_RATE_CTRL
7501     const PARTITION_INFO *partition_info,
7502     const MOTION_VECTOR_INFO *motion_vector_info,
7503 #endif  // CONFIG_RATE_CTRL
7504     ENCODE_FRAME_RESULT *encode_frame_result) {
7505 #if CONFIG_RATE_CTRL
7506   PSNR_STATS psnr;
7507 #if CONFIG_VP9_HIGHBITDEPTH
7508   vpx_calc_highbd_psnr(source_frame, coded_frame_buf->buf, &psnr, bit_depth,
7509                        input_bit_depth);
7510 #else   // CONFIG_VP9_HIGHBITDEPTH
7511   (void)bit_depth;
7512   (void)input_bit_depth;
7513   vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
7514 #endif  // CONFIG_VP9_HIGHBITDEPTH
7515   encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
7516
7517   if (update_type != KF_UPDATE) {
7518     const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
7519                                                                  VP9_GOLD_FLAG,
7520                                                                  VP9_ALT_FLAG };
7521     int i;
7522     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7523       assert(ref_frame_bufs[i] != NULL);
7524       encode_frame_result->ref_frame_coding_indexes[i] =
7525           ref_frame_bufs[i]->frame_coding_index;
7526       encode_frame_result->ref_frame_valid_list[i] =
7527           (ref_frame_flags & inter_ref_flags[i]) != 0;
7528     }
7529   } else {
7530     // No reference frame is available when this is a key frame.
7531     int i;
7532     for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
7533       encode_frame_result->ref_frame_coding_indexes[i] = -1;
7534       encode_frame_result->ref_frame_valid_list[i] = 0;
7535     }
7536   }
7537   encode_frame_result->psnr = psnr.psnr[0];
7538   encode_frame_result->sse = psnr.sse[0];
7539   copy_frame_counts(counts, &encode_frame_result->frame_counts);
7540   encode_frame_result->partition_info = partition_info;
7541   encode_frame_result->motion_vector_info = motion_vector_info;
7542   if (encode_frame_result->coded_frame.allocated) {
7543     yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
7544                                 &encode_frame_result->coded_frame);
7545   }
7546 #else   // CONFIG_RATE_CTRL
7547   (void)ref_frame_flags;
7548   (void)bit_depth;
7549   (void)input_bit_depth;
7550   (void)source_frame;
7551   (void)coded_frame_buf;
7552   (void)ref_frame_bufs;
7553   (void)counts;
7554 #endif  // CONFIG_RATE_CTRL
7555   encode_frame_result->show_idx = coded_frame_buf->frame_index;
7556   encode_frame_result->update_type = update_type;
7557   encode_frame_result->quantize_index = quantize_index;
7558 }
7559 #endif  // !CONFIG_REALTIME_ONLY
7560
7561 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
7562   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
7563 #if CONFIG_RATE_CTRL
7564   encode_frame_result->frame_coding_index = -1;
7565   vp9_zero(encode_frame_result->coded_frame);
7566   encode_frame_result->coded_frame.allocated = 0;
7567 #endif  // CONFIG_RATE_CTRL
7568 }
7569
7570 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7571                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7572                             int64_t *time_end, int flush,
7573                             ENCODE_FRAME_RESULT *encode_frame_result) {
7574   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7575   VP9_COMMON *const cm = &cpi->common;
7576   BufferPool *const pool = cm->buffer_pool;
7577   RATE_CONTROL *const rc = &cpi->rc;
7578   struct vpx_usec_timer cmptimer;
7579   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7580   struct lookahead_entry *last_source = NULL;
7581   struct lookahead_entry *source = NULL;
7582   int arf_src_index;
7583   const int gf_group_index = cpi->twopass.gf_group.index;
7584   int i;
7585
7586   if (is_one_pass_cbr_svc(cpi)) {
7587     vp9_one_pass_cbr_svc_start_layer(cpi);
7588   }
7589
7590   vpx_usec_timer_start(&cmptimer);
7591
7592   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7593
7594   // Is multi-arf enabled.
7595   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7596   // will not work properly with svc.
7597   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7598   // is greater than or equal to 2.
7599   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7600     cpi->multi_layer_arf = 1;
7601   else
7602     cpi->multi_layer_arf = 0;
7603
7604   // Normal defaults
7605   cm->reset_frame_context = 0;
7606   cm->refresh_frame_context = 1;
7607   if (!is_one_pass_cbr_svc(cpi)) {
7608     cpi->refresh_last_frame = 1;
7609     cpi->refresh_golden_frame = 0;
7610     cpi->refresh_alt_ref_frame = 0;
7611   }
7612
7613   // Should we encode an arf frame.
7614   arf_src_index = get_arf_src_index(cpi);
7615
7616   if (arf_src_index) {
7617     for (i = 0; i <= arf_src_index; ++i) {
7618       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7619       // Avoid creating an alt-ref if there's a forced keyframe pending.
7620       if (e == NULL) {
7621         break;
7622       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7623         arf_src_index = 0;
7624         flush = 1;
7625         break;
7626       }
7627     }
7628   }
7629
7630   // Clear arf index stack before group of pictures processing starts.
7631   if (gf_group_index == 1) {
7632     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7633     cpi->twopass.gf_group.stack_size = 0;
7634   }
7635
7636   if (arf_src_index) {
7637     assert(arf_src_index <= rc->frames_to_key);
7638     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7639       cpi->alt_ref_source = source;
7640
7641 #if !CONFIG_REALTIME_ONLY
7642       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7643           (oxcf->arnr_strength > 0)) {
7644         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7645         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7646
7647         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7648         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7649
7650         // Produce the filtered ARF frame.
7651         vp9_temporal_filter(cpi, arf_src_index);
7652         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7653
7654         // for small bitrates segmentation overhead usually
7655         // eats all bitrate gain from enabling delta quantizers
7656         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7657           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7658
7659         force_src_buffer = &cpi->alt_ref_buffer;
7660       }
7661 #endif
7662       cm->show_frame = 0;
7663       cm->intra_only = 0;
7664       cpi->refresh_alt_ref_frame = 1;
7665       cpi->refresh_golden_frame = 0;
7666       cpi->refresh_last_frame = 0;
7667       rc->is_src_frame_alt_ref = 0;
7668       rc->source_alt_ref_pending = 0;
7669     } else {
7670       rc->source_alt_ref_pending = 0;
7671     }
7672   }
7673
7674   if (!source) {
7675     // Get last frame source.
7676     if (cm->current_video_frame > 0) {
7677       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7678         return -1;
7679     }
7680
7681     // Read in the source frame.
7682     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7683       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7684     else
7685       source = vp9_lookahead_pop(cpi->lookahead, flush);
7686
7687     if (source != NULL) {
7688       cm->show_frame = 1;
7689       cm->intra_only = 0;
7690       // If the flags indicate intra frame, but if the current picture is for
7691       // spatial layer above first_spatial_layer_to_encode, it should not be an
7692       // intra picture.
7693       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7694           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7695         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7696       }
7697
7698       // Check to see if the frame should be encoded as an arf overlay.
7699       check_src_altref(cpi, source);
7700     }
7701   }
7702
7703   if (source) {
7704     cpi->un_scaled_source = cpi->Source =
7705         force_src_buffer ? force_src_buffer : &source->img;
7706
7707 #ifdef ENABLE_KF_DENOISE
7708     // Copy of raw source for metrics calculation.
7709     if (is_psnr_calc_enabled(cpi))
7710       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7711 #endif
7712
7713     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7714
7715     *time_stamp = source->ts_start;
7716     *time_end = source->ts_end;
7717     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7718   } else {
7719     *size = 0;
7720     return -1;
7721   }
7722
7723   if (source->ts_start < cpi->first_time_stamp_ever) {
7724     cpi->first_time_stamp_ever = source->ts_start;
7725     cpi->last_end_time_stamp_seen = source->ts_start;
7726   }
7727
7728   // Clear down mmx registers
7729   vpx_clear_system_state();
7730
7731   // adjust frame rates based on timestamps given
7732   if (cm->show_frame) {
7733     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7734         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7735       vp9_svc_adjust_frame_rate(cpi);
7736     else
7737       adjust_frame_rate(cpi, source);
7738   }
7739
7740   if (is_one_pass_cbr_svc(cpi)) {
7741     vp9_update_temporal_layer_framerate(cpi);
7742     vp9_restore_layer_context(cpi);
7743   }
7744
7745   // Find a free buffer for the new frame, releasing the reference previously
7746   // held.
7747   if (cm->new_fb_idx != INVALID_IDX) {
7748     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7749   }
7750   cm->new_fb_idx = get_free_fb(cm);
7751
7752   if (cm->new_fb_idx == INVALID_IDX) return -1;
7753
7754   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7755
7756   // Start with a 0 size frame.
7757   *size = 0;
7758
7759   cpi->frame_flags = *frame_flags;
7760
7761 #if !CONFIG_REALTIME_ONLY
7762   if ((oxcf->pass == 2) && !cpi->use_svc) {
7763     vp9_rc_get_second_pass_params(cpi);
7764   } else if (oxcf->pass == 1) {
7765     set_frame_size(cpi);
7766   }
7767 #endif  // !CONFIG_REALTIME_ONLY
7768
7769   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7770       cpi->level_constraint.fail_flag == 0)
7771     level_rc_framerate(cpi, arf_src_index);
7772
7773   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7774     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7775   }
7776
7777   if (cpi->kmeans_data_arr_alloc == 0) {
7778     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7779     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7780 #if CONFIG_MULTITHREAD
7781     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7782 #endif
7783     CHECK_MEM_ERROR(
7784         cm, cpi->kmeans_data_arr,
7785         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7786     cpi->kmeans_data_stride = mi_cols;
7787     cpi->kmeans_data_arr_alloc = 1;
7788   }
7789
7790 #if CONFIG_NON_GREEDY_MV
7791   {
7792     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7793     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7794     Status status = vp9_alloc_motion_field_info(
7795         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7796     if (status == STATUS_FAILED) {
7797       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7798                          "vp9_alloc_motion_field_info failed");
7799     }
7800   }
7801 #endif  // CONFIG_NON_GREEDY_MV
7802
7803   if (gf_group_index == 1 &&
7804       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7805       cpi->sf.enable_tpl_model) {
7806     init_tpl_buffer(cpi);
7807     vp9_estimate_qp_gop(cpi);
7808     setup_tpl_stats(cpi);
7809   }
7810
7811 #if CONFIG_BITSTREAM_DEBUG
7812   assert(cpi->oxcf.max_threads == 0 &&
7813          "bitstream debug tool does not support multithreading");
7814   bitstream_queue_record_write();
7815 #endif
7816 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7817   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7818 #endif
7819
7820   cpi->td.mb.fp_src_pred = 0;
7821 #if CONFIG_REALTIME_ONLY
7822   (void)encode_frame_result;
7823   if (cpi->use_svc) {
7824     SvcEncode(cpi, size, dest, frame_flags);
7825   } else {
7826     // One pass encode
7827     Pass0Encode(cpi, size, dest, frame_flags);
7828   }
7829 #else  // !CONFIG_REALTIME_ONLY
7830   if (oxcf->pass == 1 && !cpi->use_svc) {
7831     const int lossless = is_lossless_requested(oxcf);
7832 #if CONFIG_VP9_HIGHBITDEPTH
7833     if (cpi->oxcf.use_highbitdepth)
7834       cpi->td.mb.fwd_txfm4x4 =
7835           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7836     else
7837       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7838     cpi->td.mb.highbd_inv_txfm_add =
7839         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7840 #else
7841     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7842 #endif  // CONFIG_VP9_HIGHBITDEPTH
7843     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7844     vp9_first_pass(cpi, source);
7845   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7846     Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
7847     vp9_twopass_postencode_update(cpi);
7848   } else if (cpi->use_svc) {
7849     SvcEncode(cpi, size, dest, frame_flags);
7850   } else {
7851     // One pass encode
7852     Pass0Encode(cpi, size, dest, frame_flags);
7853   }
7854 #endif  // CONFIG_REALTIME_ONLY
7855
7856   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7857
7858   if (cm->refresh_frame_context)
7859     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7860
7861   // No frame encoded, or frame was dropped, release scaled references.
7862   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7863     release_scaled_references(cpi);
7864   }
7865
7866   if (*size > 0) {
7867     cpi->droppable = !frame_is_reference(cpi);
7868   }
7869
7870   // Save layer specific state.
7871   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7872                                     cpi->svc.number_spatial_layers > 1) &&
7873                                    oxcf->pass == 2)) {
7874     vp9_save_layer_context(cpi);
7875   }
7876
7877   vpx_usec_timer_mark(&cmptimer);
7878   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7879
7880   if (cpi->keep_level_stats && oxcf->pass != 1)
7881     update_level_info(cpi, size, arf_src_index);
7882
7883 #if CONFIG_INTERNAL_STATS
7884
7885   if (oxcf->pass != 1) {
7886     double samples = 0.0;
7887     cpi->bytes += (int)(*size);
7888
7889     if (cm->show_frame) {
7890       uint32_t bit_depth = 8;
7891       uint32_t in_bit_depth = 8;
7892       cpi->count++;
7893 #if CONFIG_VP9_HIGHBITDEPTH
7894       if (cm->use_highbitdepth) {
7895         in_bit_depth = cpi->oxcf.input_bit_depth;
7896         bit_depth = cm->bit_depth;
7897       }
7898 #endif
7899
7900       if (cpi->b_calculate_psnr) {
7901         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7902         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7903         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7904         PSNR_STATS psnr;
7905 #if CONFIG_VP9_HIGHBITDEPTH
7906         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7907                              in_bit_depth);
7908 #else
7909         vpx_calc_psnr(orig, recon, &psnr);
7910 #endif  // CONFIG_VP9_HIGHBITDEPTH
7911
7912         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7913                           psnr.psnr[0], &cpi->psnr);
7914         cpi->total_sq_error += psnr.sse[0];
7915         cpi->total_samples += psnr.samples[0];
7916         samples = psnr.samples[0];
7917
7918         {
7919           PSNR_STATS psnr2;
7920           double frame_ssim2 = 0, weight = 0;
7921 #if CONFIG_VP9_POSTPROC
7922           if (vpx_alloc_frame_buffer(
7923                   pp, recon->y_crop_width, recon->y_crop_height,
7924                   cm->subsampling_x, cm->subsampling_y,
7925 #if CONFIG_VP9_HIGHBITDEPTH
7926                   cm->use_highbitdepth,
7927 #endif
7928                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7929             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7930                                "Failed to allocate post processing buffer");
7931           }
7932           {
7933             vp9_ppflags_t ppflags;
7934             ppflags.post_proc_flag = VP9D_DEBLOCK;
7935             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7936             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7937             vp9_post_proc_frame(cm, pp, &ppflags,
7938                                 cpi->un_scaled_source->y_width);
7939           }
7940 #endif
7941           vpx_clear_system_state();
7942
7943 #if CONFIG_VP9_HIGHBITDEPTH
7944           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7945                                cpi->oxcf.input_bit_depth);
7946 #else
7947           vpx_calc_psnr(orig, pp, &psnr2);
7948 #endif  // CONFIG_VP9_HIGHBITDEPTH
7949
7950           cpi->totalp_sq_error += psnr2.sse[0];
7951           cpi->totalp_samples += psnr2.samples[0];
7952           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7953                             psnr2.psnr[0], &cpi->psnrp);
7954
7955 #if CONFIG_VP9_HIGHBITDEPTH
7956           if (cm->use_highbitdepth) {
7957             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7958                                                in_bit_depth);
7959           } else {
7960             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7961           }
7962 #else
7963           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7964 #endif  // CONFIG_VP9_HIGHBITDEPTH
7965
7966           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7967           cpi->summed_quality += frame_ssim2 * weight;
7968           cpi->summed_weights += weight;
7969
7970 #if CONFIG_VP9_HIGHBITDEPTH
7971           if (cm->use_highbitdepth) {
7972             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7973                                                in_bit_depth);
7974           } else {
7975             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7976           }
7977 #else
7978           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7979 #endif  // CONFIG_VP9_HIGHBITDEPTH
7980
7981           cpi->summedp_quality += frame_ssim2 * weight;
7982           cpi->summedp_weights += weight;
7983 #if 0
7984           if (cm->show_frame) {
7985             FILE *f = fopen("q_used.stt", "a");
7986             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7987                     cpi->common.current_video_frame, psnr2.psnr[1],
7988                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7989             fclose(f);
7990           }
7991 #endif
7992         }
7993       }
7994       if (cpi->b_calculate_blockiness) {
7995 #if CONFIG_VP9_HIGHBITDEPTH
7996         if (!cm->use_highbitdepth)
7997 #endif
7998         {
7999           double frame_blockiness = vp9_get_blockiness(
8000               cpi->Source->y_buffer, cpi->Source->y_stride,
8001               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
8002               cpi->Source->y_width, cpi->Source->y_height);
8003           cpi->worst_blockiness =
8004               VPXMAX(cpi->worst_blockiness, frame_blockiness);
8005           cpi->total_blockiness += frame_blockiness;
8006         }
8007       }
8008
8009       if (cpi->b_calculate_consistency) {
8010 #if CONFIG_VP9_HIGHBITDEPTH
8011         if (!cm->use_highbitdepth)
8012 #endif
8013         {
8014           double this_inconsistency = vpx_get_ssim_metrics(
8015               cpi->Source->y_buffer, cpi->Source->y_stride,
8016               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
8017               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
8018               &cpi->metrics, 1);
8019
8020           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
8021           double consistency =
8022               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
8023           if (consistency > 0.0)
8024             cpi->worst_consistency =
8025                 VPXMIN(cpi->worst_consistency, consistency);
8026           cpi->total_inconsistency += this_inconsistency;
8027         }
8028       }
8029
8030       {
8031         double y, u, v, frame_all;
8032         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
8033                                       &v, bit_depth, in_bit_depth);
8034         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
8035       }
8036       {
8037         double y, u, v, frame_all;
8038         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
8039                                 bit_depth, in_bit_depth);
8040         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
8041       }
8042     }
8043   }
8044
8045 #endif
8046
8047   if (is_one_pass_cbr_svc(cpi)) {
8048     if (cm->show_frame) {
8049       ++cpi->svc.spatial_layer_to_encode;
8050       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
8051         cpi->svc.spatial_layer_to_encode = 0;
8052     }
8053   }
8054
8055   vpx_clear_system_state();
8056   return 0;
8057 }
8058
8059 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
8060                               vp9_ppflags_t *flags) {
8061   VP9_COMMON *cm = &cpi->common;
8062 #if !CONFIG_VP9_POSTPROC
8063   (void)flags;
8064 #endif
8065
8066   if (!cm->show_frame) {
8067     return -1;
8068   } else {
8069     int ret;
8070 #if CONFIG_VP9_POSTPROC
8071     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
8072 #else
8073     if (cm->frame_to_show) {
8074       *dest = *cm->frame_to_show;
8075       dest->y_width = cm->width;
8076       dest->y_height = cm->height;
8077       dest->uv_width = cm->width >> cm->subsampling_x;
8078       dest->uv_height = cm->height >> cm->subsampling_y;
8079       ret = 0;
8080     } else {
8081       ret = -1;
8082     }
8083 #endif  // !CONFIG_VP9_POSTPROC
8084     vpx_clear_system_state();
8085     return ret;
8086   }
8087 }
8088
8089 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
8090                           VPX_SCALING vert_mode) {
8091   VP9_COMMON *cm = &cpi->common;
8092   int hr = 0, hs = 0, vr = 0, vs = 0;
8093
8094   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
8095
8096   Scale2Ratio(horiz_mode, &hr, &hs);
8097   Scale2Ratio(vert_mode, &vr, &vs);
8098
8099   // always go to the next whole number
8100   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
8101   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
8102   if (cm->current_video_frame) {
8103     assert(cm->width <= cpi->initial_width);
8104     assert(cm->height <= cpi->initial_height);
8105   }
8106
8107   update_frame_size(cpi);
8108
8109   return 0;
8110 }
8111
8112 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
8113                          unsigned int height) {
8114   VP9_COMMON *cm = &cpi->common;
8115 #if CONFIG_VP9_HIGHBITDEPTH
8116   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
8117 #else
8118   update_initial_width(cpi, 0, 1, 1);
8119 #endif  // CONFIG_VP9_HIGHBITDEPTH
8120
8121 #if CONFIG_VP9_TEMPORAL_DENOISING
8122   setup_denoiser_buffer(cpi);
8123 #endif
8124   alloc_raw_frame_buffers(cpi);
8125   if (width) {
8126     cm->width = width;
8127     if (cm->width > cpi->initial_width) {
8128       cm->width = cpi->initial_width;
8129       printf("Warning: Desired width too large, changed to %d\n", cm->width);
8130     }
8131   }
8132
8133   if (height) {
8134     cm->height = height;
8135     if (cm->height > cpi->initial_height) {
8136       cm->height = cpi->initial_height;
8137       printf("Warning: Desired height too large, changed to %d\n", cm->height);
8138     }
8139   }
8140   assert(cm->width <= cpi->initial_width);
8141   assert(cm->height <= cpi->initial_height);
8142
8143   update_frame_size(cpi);
8144
8145   return 0;
8146 }
8147
8148 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
8149   cpi->use_svc = use_svc;
8150   return;
8151 }
8152
8153 int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
8154
8155 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
8156   if (flags &
8157       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
8158     int ref = 7;
8159
8160     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
8161
8162     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
8163
8164     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
8165
8166     vp9_use_as_reference(cpi, ref);
8167   }
8168
8169   if (flags &
8170       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
8171        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
8172     int upd = 7;
8173
8174     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
8175
8176     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
8177
8178     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
8179
8180     vp9_update_reference(cpi, upd);
8181   }
8182
8183   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
8184     vp9_update_entropy(cpi, 0);
8185   }
8186 }
8187
8188 void vp9_set_row_mt(VP9_COMP *cpi) {
8189   // Enable row based multi-threading for supported modes of encoding
8190   cpi->row_mt = 0;
8191   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
8192        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
8193       cpi->oxcf.row_mt && !cpi->use_svc)
8194     cpi->row_mt = 1;
8195
8196   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
8197       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
8198       !cpi->use_svc)
8199     cpi->row_mt = 1;
8200
8201   // In realtime mode, enable row based multi-threading for all the speed levels
8202   // where non-rd path is used.
8203   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
8204     cpi->row_mt = 1;
8205   }
8206
8207   if (cpi->row_mt)
8208     cpi->row_mt_bit_exact = 1;
8209   else
8210     cpi->row_mt_bit_exact = 0;
8211 }