granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83 #include "vp9/vp9_cx_iface.h"
  84
  85 #define AM_SEGMENT_ID_INACTIVE 7
  86 #define AM_SEGMENT_ID_ACTIVE 0
  87
  88 // Whether to use high precision mv for altref computation.
  89 #define ALTREF_HIGH_PRECISION_MV 1
  90
  91 // Q threshold for high precision mv. Choose a very high value for now so that
  92 // HIGH_PRECISION is always chosen.
  93 #define HIGH_PRECISION_MV_QTHRESH 200
  94
  95 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  96 #define FRAME_RATE_FACTOR 8
  97
  98 #ifdef OUTPUT_YUV_DENOISED
  99 FILE *yuv_denoised_file = NULL;
 100 #endif
 101 #ifdef OUTPUT_YUV_SKINMAP
 102 static FILE *yuv_skinmap_file = NULL;
 103 #endif
 104 #ifdef OUTPUT_YUV_REC
 105 FILE *yuv_rec_file;
 106 #endif
 107 #ifdef OUTPUT_YUV_SVC_SRC
 108 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 109 #endif
 110
 111 #if 0
 112 FILE *framepsnr;
 113 FILE *kf_list;
 114 FILE *keyfile;
 115 #endif
 116
 117 #ifdef ENABLE_KF_DENOISE
 118 // Test condition for spatial denoise of source.
 119 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 120   VP9_COMMON *const cm = &cpi->common;
 121   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 122
 123   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 124          frame_is_intra_only(cm);
 125 }
 126 #endif
 127
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 130                          TX_SIZE tx_size);
 131 #endif
 132 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 133                   TX_SIZE tx_size);
 134
 135 #if !CONFIG_REALTIME_ONLY
 136 // compute adaptive threshold for skip recoding
 137 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 138   const VP9_COMMON *const cm = &cpi->common;
 139   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 140   const int frame_size = (cm->width * cm->height) >> 10;
 141   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 142   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 143
 144   // This equation makes the threshold adaptive to frame size.
 145   // Coding gain obtained by recoding comes from alternate frames of large
 146   // content change. We skip recoding if the difference of previous and current
 147   // frame context probability model is less than a certain threshold.
 148   // The first component is the most critical part to guarantee adaptivity.
 149   // Other parameters are estimated based on normal setting of hd resolution
 150   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 151   const int thresh =
 152       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 153        qindex_factor) >>
 154       9;
 155
 156   return thresh;
 157 }
 158
 159 // compute the total cost difference between current
 160 // and previous frame context prob model.
 161 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 162   const FRAME_CONTEXT *const pre_fc =
 163       &cm->frame_contexts[cm->frame_context_idx];
 164   const FRAME_CONTEXT *const cur_fc = cm->fc;
 165   const FRAME_COUNTS *counts = &cm->counts;
 166   vpx_prob pre_last_prob, cur_last_prob;
 167   int diff = 0;
 168   int i, j, k, l, m, n;
 169
 170   // y_mode_prob
 171   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 172     for (j = 0; j < INTRA_MODES - 1; ++j) {
 173       diff += (int)counts->y_mode[i][j] *
 174               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 175     }
 176     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 177     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 178
 179     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 180             (pre_last_prob - cur_last_prob);
 181   }
 182
 183   // uv_mode_prob
 184   for (i = 0; i < INTRA_MODES; ++i) {
 185     for (j = 0; j < INTRA_MODES - 1; ++j) {
 186       diff += (int)counts->uv_mode[i][j] *
 187               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 188     }
 189     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 191
 192     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 193             (pre_last_prob - cur_last_prob);
 194   }
 195
 196   // partition_prob
 197   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 198     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 199       diff += (int)counts->partition[i][j] *
 200               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 201     }
 202     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 203     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 204
 205     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 206             (pre_last_prob - cur_last_prob);
 207   }
 208
 209   // coef_probs
 210   for (i = 0; i < TX_SIZES; ++i) {
 211     for (j = 0; j < PLANE_TYPES; ++j) {
 212       for (k = 0; k < REF_TYPES; ++k) {
 213         for (l = 0; l < COEF_BANDS; ++l) {
 214           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 215             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 216               diff += (int)counts->coef[i][j][k][l][m][n] *
 217                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 218                        cur_fc->coef_probs[i][j][k][l][m][n]);
 219             }
 220
 221             pre_last_prob =
 222                 MAX_PROB -
 223                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 224             cur_last_prob =
 225                 MAX_PROB -
 226                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 227
 228             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 229                     (pre_last_prob - cur_last_prob);
 230           }
 231         }
 232       }
 233     }
 234   }
 235
 236   // switchable_interp_prob
 237   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 238     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 239       diff += (int)counts->switchable_interp[i][j] *
 240               (pre_fc->switchable_interp_prob[i][j] -
 241                cur_fc->switchable_interp_prob[i][j]);
 242     }
 243     pre_last_prob =
 244         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 245     cur_last_prob =
 246         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 247
 248     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 249             (pre_last_prob - cur_last_prob);
 250   }
 251
 252   // inter_mode_probs
 253   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 254     for (j = 0; j < INTER_MODES - 1; ++j) {
 255       diff += (int)counts->inter_mode[i][j] *
 256               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 257     }
 258     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 259     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 260
 261     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 262             (pre_last_prob - cur_last_prob);
 263   }
 264
 265   // intra_inter_prob
 266   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 267     diff += (int)counts->intra_inter[i][0] *
 268             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 269
 270     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 271     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 272
 273     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 274   }
 275
 276   // comp_inter_prob
 277   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 278     diff += (int)counts->comp_inter[i][0] *
 279             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 280
 281     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 282     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 283
 284     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 285   }
 286
 287   // single_ref_prob
 288   for (i = 0; i < REF_CONTEXTS; ++i) {
 289     for (j = 0; j < 2; ++j) {
 290       diff += (int)counts->single_ref[i][j][0] *
 291               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 292
 293       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 294       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 295
 296       diff +=
 297           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 298     }
 299   }
 300
 301   // comp_ref_prob
 302   for (i = 0; i < REF_CONTEXTS; ++i) {
 303     diff += (int)counts->comp_ref[i][0] *
 304             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 305
 306     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 307     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 308
 309     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 310   }
 311
 312   // tx_probs
 313   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 314     // p32x32
 315     for (j = 0; j < TX_SIZES - 1; ++j) {
 316       diff += (int)counts->tx.p32x32[i][j] *
 317               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 318     }
 319     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 321
 322     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 323             (pre_last_prob - cur_last_prob);
 324
 325     // p16x16
 326     for (j = 0; j < TX_SIZES - 2; ++j) {
 327       diff += (int)counts->tx.p16x16[i][j] *
 328               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 329     }
 330     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 332
 333     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 334             (pre_last_prob - cur_last_prob);
 335
 336     // p8x8
 337     for (j = 0; j < TX_SIZES - 3; ++j) {
 338       diff += (int)counts->tx.p8x8[i][j] *
 339               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 340     }
 341     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 343
 344     diff +=
 345         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 346   }
 347
 348   // skip_probs
 349   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 350     diff += (int)counts->skip[i][0] *
 351             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 352
 353     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 354     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 355
 356     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 357   }
 358
 359   // mv
 360   for (i = 0; i < MV_JOINTS - 1; ++i) {
 361     diff += (int)counts->mv.joints[i] *
 362             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 363   }
 364   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 365   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 366
 367   diff +=
 368       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 369
 370   for (i = 0; i < 2; ++i) {
 371     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 372     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 373     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 374
 375     // sign
 376     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 377
 378     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 379     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 380
 381     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 382
 383     // classes
 384     for (j = 0; j < MV_CLASSES - 1; ++j) {
 385       diff += (int)nmv_count->classes[j] *
 386               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 387     }
 388     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 389     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 390
 391     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 392             (pre_last_prob - cur_last_prob);
 393
 394     // class0
 395     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 396       diff += (int)nmv_count->class0[j] *
 397               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 398     }
 399     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 400     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 401
 402     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 403             (pre_last_prob - cur_last_prob);
 404
 405     // bits
 406     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 407       diff += (int)nmv_count->bits[j][0] *
 408               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 409
 410       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 411       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 412
 413       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 414     }
 415
 416     // class0_fp
 417     for (j = 0; j < CLASS0_SIZE; ++j) {
 418       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 419         diff += (int)nmv_count->class0_fp[j][k] *
 420                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 421       }
 422       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 424
 425       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 426               (pre_last_prob - cur_last_prob);
 427     }
 428
 429     // fp
 430     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 431       diff +=
 432           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 433     }
 434     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 435     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 436
 437     diff +=
 438         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 439
 440     // class0_hp
 441     diff += (int)nmv_count->class0_hp[0] *
 442             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 443
 444     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 445     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 446
 447     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 448
 449     // hp
 450     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 451
 452     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 453     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 454
 455     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 456   }
 457
 458   return -diff;
 459 }
 460 #endif  // !CONFIG_REALTIME_ONLY
 461
 462 // Test for whether to calculate metrics for the frame.
 463 static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
 464   const VP9_COMMON *const cm = &cpi->common;
 465   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 466
 467   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 468 }
 469
 470 /* clang-format off */
 471 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 472   //         sample rate    size   breadth  bitrate  cpb
 473   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 474   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 475   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 476   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 477   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 478   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 479   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 480   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 481   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 482   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 483   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 484   // they are finalized (currently tentative).
 485   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 486   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 487   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 488   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 489 };
 490 /* clang-format on */
 491
 492 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 493   "The average bit-rate is too high.",
 494   "The picture size is too large.",
 495   "The picture width/height is too large.",
 496   "The luma sample rate is too large.",
 497   "The CPB size is too large.",
 498   "The compression ratio is too small",
 499   "Too many column tiles are used.",
 500   "The alt-ref distance is too small.",
 501   "Too many reference buffers are used."
 502 };
 503
 504 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 505   switch (mode) {
 506     case NORMAL:
 507       *hr = 1;
 508       *hs = 1;
 509       break;
 510     case FOURFIVE:
 511       *hr = 4;
 512       *hs = 5;
 513       break;
 514     case THREEFIVE:
 515       *hr = 3;
 516       *hs = 5;
 517       break;
 518     default:
 519       assert(mode == ONETWO);
 520       *hr = 1;
 521       *hs = 2;
 522       break;
 523   }
 524 }
 525
 526 // Mark all inactive blocks as active. Other segmentation features may be set
 527 // so memset cannot be used, instead only inactive blocks should be reset.
 528 static void suppress_active_map(VP9_COMP *cpi) {
 529   unsigned char *const seg_map = cpi->segmentation_map;
 530
 531   if (cpi->active_map.enabled || cpi->active_map.update) {
 532     const int rows = cpi->common.mi_rows;
 533     const int cols = cpi->common.mi_cols;
 534     int i;
 535
 536     for (i = 0; i < rows * cols; ++i)
 537       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 538         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 539   }
 540 }
 541
 542 static void apply_active_map(VP9_COMP *cpi) {
 543   struct segmentation *const seg = &cpi->common.seg;
 544   unsigned char *const seg_map = cpi->segmentation_map;
 545   const unsigned char *const active_map = cpi->active_map.map;
 546   int i;
 547
 548   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 549
 550   if (frame_is_intra_only(&cpi->common)) {
 551     cpi->active_map.enabled = 0;
 552     cpi->active_map.update = 1;
 553   }
 554
 555   if (cpi->active_map.update) {
 556     if (cpi->active_map.enabled) {
 557       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 558         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 559       vp9_enable_segmentation(seg);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 561       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 562       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 563       // filter level being zero regardless of the value of seg->abs_delta.
 564       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 565                       -MAX_LOOP_FILTER);
 566     } else {
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 568       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 569       if (seg->enabled) {
 570         seg->update_data = 1;
 571         seg->update_map = 1;
 572       }
 573     }
 574     cpi->active_map.update = 0;
 575   }
 576 }
 577
 578 static void apply_roi_map(VP9_COMP *cpi) {
 579   VP9_COMMON *cm = &cpi->common;
 580   struct segmentation *const seg = &cm->seg;
 581   vpx_roi_map_t *roi = &cpi->roi;
 582   const int *delta_q = roi->delta_q;
 583   const int *delta_lf = roi->delta_lf;
 584   const int *skip = roi->skip;
 585   int ref_frame[8];
 586   int internal_delta_q[MAX_SEGMENTS];
 587   int i;
 588   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 589                                     VP9_ALT_FLAG };
 590
 591   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 592   // realtime mode.
 593   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 594   if (!roi->enabled) return;
 595
 596   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 597
 598   vp9_enable_segmentation(seg);
 599   vp9_clearall_segfeatures(seg);
 600   // Select delta coding method;
 601   seg->abs_delta = SEGMENT_DELTADATA;
 602
 603   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 604
 605   for (i = 0; i < MAX_SEGMENTS; ++i) {
 606     // Translate the external delta q values to internal values.
 607     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 608     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 610     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 611     if (internal_delta_q[i] != 0) {
 612       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 613       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 614     }
 615     if (delta_lf[i] != 0) {
 616       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 617       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 618     }
 619     if (skip[i] != 0) {
 620       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 621       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 622     }
 623     if (ref_frame[i] >= 0) {
 624       int valid_ref = 1;
 625       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 626       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 627         valid_ref = 0;
 628       // If GOLDEN is selected, make sure it's set as reference.
 629       if (ref_frame[i] == GOLDEN_FRAME &&
 630           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 631         valid_ref = 0;
 632       }
 633       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 634       // same reference.
 635       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 636         ref_frame[i] = LAST_FRAME;
 637       if (valid_ref) {
 638         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 639         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 640       }
 641     }
 642   }
 643   roi->enabled = 1;
 644 }
 645
 646 static void init_level_info(Vp9LevelInfo *level_info) {
 647   Vp9LevelStats *const level_stats = &level_info->level_stats;
 648   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 649
 650   memset(level_stats, 0, sizeof(*level_stats));
 651   memset(level_spec, 0, sizeof(*level_spec));
 652   level_spec->level = LEVEL_UNKNOWN;
 653   level_spec->min_altref_distance = INT_MAX;
 654 }
 655
 656 static int check_seg_range(int seg_data[8], int range) {
 657   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 658            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 659            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 660            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 661 }
 662
 663 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 664   int i;
 665   const Vp9LevelSpec *this_level;
 666
 667   vpx_clear_system_state();
 668
 669   for (i = 0; i < VP9_LEVELS; ++i) {
 670     this_level = &vp9_level_defs[i];
 671     if ((double)level_spec->max_luma_sample_rate >
 672             (double)this_level->max_luma_sample_rate *
 673                 (1 + SAMPLE_RATE_GRACE_P) ||
 674         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 675         level_spec->max_luma_picture_breadth >
 676             this_level->max_luma_picture_breadth ||
 677         level_spec->average_bitrate > this_level->average_bitrate ||
 678         level_spec->max_cpb_size > this_level->max_cpb_size ||
 679         level_spec->compression_ratio < this_level->compression_ratio ||
 680         level_spec->max_col_tiles > this_level->max_col_tiles ||
 681         level_spec->min_altref_distance < this_level->min_altref_distance ||
 682         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 683       continue;
 684     break;
 685   }
 686   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 687 }
 688
 689 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 690                     unsigned int cols, int delta_q[8], int delta_lf[8],
 691                     int skip[8], int ref_frame[8]) {
 692   VP9_COMMON *cm = &cpi->common;
 693   vpx_roi_map_t *roi = &cpi->roi;
 694   const int range = 63;
 695   const int ref_frame_range = 3;  // Alt-ref
 696   const int skip_range = 1;
 697   const int frame_rows = cpi->common.mi_rows;
 698   const int frame_cols = cpi->common.mi_cols;
 699
 700   // Check number of rows and columns match
 701   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 702     return -1;
 703   }
 704
 705   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 706       !check_seg_range(ref_frame, ref_frame_range) ||
 707       !check_seg_range(skip, skip_range))
 708     return -1;
 709
 710   // Also disable segmentation if no deltas are specified.
 711   if (!map ||
 712       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 713          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 714          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 715          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 716          skip[5] | skip[6] | skip[7]) &&
 717        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 718         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 719         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 720     vp9_disable_segmentation(&cm->seg);
 721     cpi->roi.enabled = 0;
 722     return 0;
 723   }
 724
 725   if (roi->roi_map) {
 726     vpx_free(roi->roi_map);
 727     roi->roi_map = NULL;
 728   }
 729   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 730
 731   // Copy to ROI sturcture in the compressor.
 732   memcpy(roi->roi_map, map, rows * cols);
 733   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 734   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 735   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 736   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 737   roi->enabled = 1;
 738   roi->rows = rows;
 739   roi->cols = cols;
 740
 741   return 0;
 742 }
 743
 744 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 745                        int cols) {
 746   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 747     unsigned char *const active_map_8x8 = cpi->active_map.map;
 748     const int mi_rows = cpi->common.mi_rows;
 749     const int mi_cols = cpi->common.mi_cols;
 750     cpi->active_map.update = 1;
 751     if (new_map_16x16) {
 752       int r, c;
 753       for (r = 0; r < mi_rows; ++r) {
 754         for (c = 0; c < mi_cols; ++c) {
 755           active_map_8x8[r * mi_cols + c] =
 756               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 757                   ? AM_SEGMENT_ID_ACTIVE
 758                   : AM_SEGMENT_ID_INACTIVE;
 759         }
 760       }
 761       cpi->active_map.enabled = 1;
 762     } else {
 763       cpi->active_map.enabled = 0;
 764     }
 765     return 0;
 766   } else {
 767     return -1;
 768   }
 769 }
 770
 771 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 772                        int cols) {
 773   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 774       new_map_16x16) {
 775     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 776     const int mi_rows = cpi->common.mi_rows;
 777     const int mi_cols = cpi->common.mi_cols;
 778     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 779     if (cpi->active_map.enabled) {
 780       int r, c;
 781       for (r = 0; r < mi_rows; ++r) {
 782         for (c = 0; c < mi_cols; ++c) {
 783           // Cyclic refresh segments are considered active despite not having
 784           // AM_SEGMENT_ID_ACTIVE
 785           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 786               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 787         }
 788       }
 789     }
 790     return 0;
 791   } else {
 792     return -1;
 793   }
 794 }
 795
 796 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 797   MACROBLOCK *const mb = &cpi->td.mb;
 798   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 799   if (cpi->common.allow_high_precision_mv) {
 800     mb->mvcost = mb->nmvcost_hp;
 801     mb->mvsadcost = mb->nmvsadcost_hp;
 802   } else {
 803     mb->mvcost = mb->nmvcost;
 804     mb->mvsadcost = mb->nmvsadcost;
 805   }
 806 }
 807
 808 static void setup_frame(VP9_COMP *cpi) {
 809   VP9_COMMON *const cm = &cpi->common;
 810   // Set up entropy context depending on frame type. The decoder mandates
 811   // the use of the default context, index 0, for keyframes and inter
 812   // frames where the error_resilient_mode or intra_only flag is set. For
 813   // other inter-frames the encoder currently uses only two contexts;
 814   // context 1 for ALTREF frames and context 0 for the others.
 815   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 816     vp9_setup_past_independence(cm);
 817   } else {
 818     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 819   }
 820
 821   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 822   // case. Need some further investigation on if we could apply this to single
 823   // layer ARF case as well.
 824   if (cpi->multi_layer_arf && !cpi->use_svc) {
 825     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 826     const int gf_group_index = gf_group->index;
 827     const int boost_frame =
 828         !cpi->rc.is_src_frame_alt_ref &&
 829         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 830
 831     // frame_context_idx           Frame Type
 832     //        0              Intra only frame, base layer ARF
 833     //        1              ARFs with layer depth = 2,3
 834     //        2              ARFs with layer depth > 3
 835     //        3              Non-boosted frames
 836     if (frame_is_intra_only(cm)) {
 837       cm->frame_context_idx = 0;
 838     } else if (boost_frame) {
 839       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 840         cm->frame_context_idx = 0;
 841       else if (gf_group->layer_depth[gf_group_index] <= 3)
 842         cm->frame_context_idx = 1;
 843       else
 844         cm->frame_context_idx = 2;
 845     } else {
 846       cm->frame_context_idx = 3;
 847     }
 848   }
 849
 850   if (cm->frame_type == KEY_FRAME) {
 851     cpi->refresh_golden_frame = 1;
 852     cpi->refresh_alt_ref_frame = 1;
 853     vp9_zero(cpi->interp_filter_selected);
 854   } else {
 855     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 856     vp9_zero(cpi->interp_filter_selected[0]);
 857   }
 858 }
 859
 860 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 861   int i;
 862   cm->mi = cm->mip + cm->mi_stride + 1;
 863   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 864   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 865   // Clear top border row
 866   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 867   // Clear left border column
 868   for (i = 1; i < cm->mi_rows + 1; ++i)
 869     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 870
 871   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 872   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 873
 874   memset(cm->mi_grid_base, 0,
 875          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 876 }
 877
 878 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 879   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 880   if (!cm->mip) return 1;
 881   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 882   if (!cm->prev_mip) return 1;
 883   cm->mi_alloc_size = mi_size;
 884
 885   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 886   if (!cm->mi_grid_base) return 1;
 887   cm->prev_mi_grid_base =
 888       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 889   if (!cm->prev_mi_grid_base) return 1;
 890
 891   return 0;
 892 }
 893
 894 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 895   vpx_free(cm->mip);
 896   cm->mip = NULL;
 897   vpx_free(cm->prev_mip);
 898   cm->prev_mip = NULL;
 899   vpx_free(cm->mi_grid_base);
 900   cm->mi_grid_base = NULL;
 901   vpx_free(cm->prev_mi_grid_base);
 902   cm->prev_mi_grid_base = NULL;
 903   cm->mi_alloc_size = 0;
 904 }
 905
 906 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 907   // Current mip will be the prev_mip for the next frame.
 908   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 909   MODE_INFO *temp = cm->prev_mip;
 910
 911   // Skip update prev_mi frame in show_existing_frame mode.
 912   if (cm->show_existing_frame) return;
 913
 914   cm->prev_mip = cm->mip;
 915   cm->mip = temp;
 916
 917   // Update the upper left visible macroblock ptrs.
 918   cm->mi = cm->mip + cm->mi_stride + 1;
 919   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 920
 921   cm->prev_mi_grid_base = cm->mi_grid_base;
 922   cm->mi_grid_base = temp_base;
 923   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 924   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 925 }
 926
 927 void vp9_initialize_enc(void) {
 928   static volatile int init_done = 0;
 929
 930   if (!init_done) {
 931     vp9_rtcd();
 932     vpx_dsp_rtcd();
 933     vpx_scale_rtcd();
 934     vp9_init_intra_predictors();
 935     vp9_init_me_luts();
 936     vp9_rc_init_minq_luts();
 937     vp9_entropy_mv_init();
 938 #if !CONFIG_REALTIME_ONLY
 939     vp9_temporal_filter_init();
 940 #endif
 941     init_done = 1;
 942   }
 943 }
 944
 945 static void dealloc_compressor_data(VP9_COMP *cpi) {
 946   VP9_COMMON *const cm = &cpi->common;
 947   int i;
 948
 949   vpx_free(cpi->mbmi_ext_base);
 950   cpi->mbmi_ext_base = NULL;
 951
 952   vpx_free(cpi->tile_data);
 953   cpi->tile_data = NULL;
 954
 955   vpx_free(cpi->segmentation_map);
 956   cpi->segmentation_map = NULL;
 957   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 958   cpi->coding_context.last_frame_seg_map_copy = NULL;
 959
 960   vpx_free(cpi->nmvcosts[0]);
 961   vpx_free(cpi->nmvcosts[1]);
 962   cpi->nmvcosts[0] = NULL;
 963   cpi->nmvcosts[1] = NULL;
 964
 965   vpx_free(cpi->nmvcosts_hp[0]);
 966   vpx_free(cpi->nmvcosts_hp[1]);
 967   cpi->nmvcosts_hp[0] = NULL;
 968   cpi->nmvcosts_hp[1] = NULL;
 969
 970   vpx_free(cpi->nmvsadcosts[0]);
 971   vpx_free(cpi->nmvsadcosts[1]);
 972   cpi->nmvsadcosts[0] = NULL;
 973   cpi->nmvsadcosts[1] = NULL;
 974
 975   vpx_free(cpi->nmvsadcosts_hp[0]);
 976   vpx_free(cpi->nmvsadcosts_hp[1]);
 977   cpi->nmvsadcosts_hp[0] = NULL;
 978   cpi->nmvsadcosts_hp[1] = NULL;
 979
 980   vpx_free(cpi->skin_map);
 981   cpi->skin_map = NULL;
 982
 983   vpx_free(cpi->prev_partition);
 984   cpi->prev_partition = NULL;
 985
 986   vpx_free(cpi->svc.prev_partition_svc);
 987   cpi->svc.prev_partition_svc = NULL;
 988
 989   vpx_free(cpi->prev_segment_id);
 990   cpi->prev_segment_id = NULL;
 991
 992   vpx_free(cpi->prev_variance_low);
 993   cpi->prev_variance_low = NULL;
 994
 995   vpx_free(cpi->copied_frame_cnt);
 996   cpi->copied_frame_cnt = NULL;
 997
 998   vpx_free(cpi->content_state_sb_fd);
 999   cpi->content_state_sb_fd = NULL;
1000
1001   vpx_free(cpi->count_arf_frame_usage);
1002   cpi->count_arf_frame_usage = NULL;
1003   vpx_free(cpi->count_lastgolden_frame_usage);
1004   cpi->count_lastgolden_frame_usage = NULL;
1005
1006   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1007   cpi->cyclic_refresh = NULL;
1008
1009   vpx_free(cpi->active_map.map);
1010   cpi->active_map.map = NULL;
1011
1012   vpx_free(cpi->roi.roi_map);
1013   cpi->roi.roi_map = NULL;
1014
1015   vpx_free(cpi->consec_zero_mv);
1016   cpi->consec_zero_mv = NULL;
1017
1018   vpx_free(cpi->mb_wiener_variance);
1019   cpi->mb_wiener_variance = NULL;
1020
1021   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1022   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1023
1024   vp9_free_ref_frame_buffers(cm->buffer_pool);
1025 #if CONFIG_VP9_POSTPROC
1026   vp9_free_postproc_buffers(cm);
1027 #endif
1028   vp9_free_context_buffers(cm);
1029
1030   vpx_free_frame_buffer(&cpi->last_frame_uf);
1031   vpx_free_frame_buffer(&cpi->scaled_source);
1032   vpx_free_frame_buffer(&cpi->scaled_last_source);
1033   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1034 #ifdef ENABLE_KF_DENOISE
1035   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1036   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1037 #endif
1038
1039   vp9_lookahead_destroy(cpi->lookahead);
1040
1041   vpx_free(cpi->tile_tok[0][0]);
1042   cpi->tile_tok[0][0] = 0;
1043
1044   vpx_free(cpi->tplist[0][0]);
1045   cpi->tplist[0][0] = NULL;
1046
1047   vp9_free_pc_tree(&cpi->td);
1048
1049   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1050     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1051     vpx_free(lc->rc_twopass_stats_in.buf);
1052     lc->rc_twopass_stats_in.buf = NULL;
1053     lc->rc_twopass_stats_in.sz = 0;
1054   }
1055
1056   if (cpi->source_diff_var != NULL) {
1057     vpx_free(cpi->source_diff_var);
1058     cpi->source_diff_var = NULL;
1059   }
1060
1061   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1062     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1063   }
1064   memset(&cpi->svc.scaled_frames[0], 0,
1065          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1066
1067   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1068   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1069
1070   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1071   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1072
1073   vp9_free_svc_cyclic_refresh(cpi);
1074 }
1075
1076 static void save_coding_context(VP9_COMP *cpi) {
1077   CODING_CONTEXT *const cc = &cpi->coding_context;
1078   VP9_COMMON *cm = &cpi->common;
1079
1080   // Stores a snapshot of key state variables which can subsequently be
1081   // restored with a call to vp9_restore_coding_context. These functions are
1082   // intended for use in a re-code loop in vp9_compress_frame where the
1083   // quantizer value is adjusted between loop iterations.
1084   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1085
1086   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1087          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1088   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1089          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1090   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1091          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1092   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1093          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1094
1095   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1096
1097   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1098          (cm->mi_rows * cm->mi_cols));
1099
1100   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1101   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1102
1103   cc->fc = *cm->fc;
1104 }
1105
1106 static void restore_coding_context(VP9_COMP *cpi) {
1107   CODING_CONTEXT *const cc = &cpi->coding_context;
1108   VP9_COMMON *cm = &cpi->common;
1109
1110   // Restore key state variables to the snapshot state stored in the
1111   // previous call to vp9_save_coding_context.
1112   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1113
1114   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1115   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1116   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1117          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1118   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1119          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1120
1121   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1122
1123   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1124          (cm->mi_rows * cm->mi_cols));
1125
1126   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1127   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1128
1129   *cm->fc = cc->fc;
1130 }
1131
1132 #if !CONFIG_REALTIME_ONLY
1133 static void configure_static_seg_features(VP9_COMP *cpi) {
1134   VP9_COMMON *const cm = &cpi->common;
1135   const RATE_CONTROL *const rc = &cpi->rc;
1136   struct segmentation *const seg = &cm->seg;
1137
1138   int high_q = (int)(rc->avg_q > 48.0);
1139   int qi_delta;
1140
1141   // Disable and clear down for KF
1142   if (cm->frame_type == KEY_FRAME) {
1143     // Clear down the global segmentation map
1144     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1145     seg->update_map = 0;
1146     seg->update_data = 0;
1147     cpi->static_mb_pct = 0;
1148
1149     // Disable segmentation
1150     vp9_disable_segmentation(seg);
1151
1152     // Clear down the segment features.
1153     vp9_clearall_segfeatures(seg);
1154   } else if (cpi->refresh_alt_ref_frame) {
1155     // If this is an alt ref frame
1156     // Clear down the global segmentation map
1157     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1158     seg->update_map = 0;
1159     seg->update_data = 0;
1160     cpi->static_mb_pct = 0;
1161
1162     // Disable segmentation and individual segment features by default
1163     vp9_disable_segmentation(seg);
1164     vp9_clearall_segfeatures(seg);
1165
1166     // Scan frames from current to arf frame.
1167     // This function re-enables segmentation if appropriate.
1168     vp9_update_mbgraph_stats(cpi);
1169
1170     // If segmentation was enabled set those features needed for the
1171     // arf itself.
1172     if (seg->enabled) {
1173       seg->update_map = 1;
1174       seg->update_data = 1;
1175
1176       qi_delta =
1177           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1178       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1179       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1180
1181       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1182       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1183
1184       // Where relevant assume segment data is delta data
1185       seg->abs_delta = SEGMENT_DELTADATA;
1186     }
1187   } else if (seg->enabled) {
1188     // All other frames if segmentation has been enabled
1189
1190     // First normal frame in a valid gf or alt ref group
1191     if (rc->frames_since_golden == 0) {
1192       // Set up segment features for normal frames in an arf group
1193       if (rc->source_alt_ref_active) {
1194         seg->update_map = 0;
1195         seg->update_data = 1;
1196         seg->abs_delta = SEGMENT_DELTADATA;
1197
1198         qi_delta =
1199             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1200         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1201         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1202
1203         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1204         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1205
1206         // Segment coding disabled for compred testing
1207         if (high_q || (cpi->static_mb_pct == 100)) {
1208           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1209           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1210           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1211         }
1212       } else {
1213         // Disable segmentation and clear down features if alt ref
1214         // is not active for this group
1215
1216         vp9_disable_segmentation(seg);
1217
1218         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1219
1220         seg->update_map = 0;
1221         seg->update_data = 0;
1222
1223         vp9_clearall_segfeatures(seg);
1224       }
1225     } else if (rc->is_src_frame_alt_ref) {
1226       // Special case where we are coding over the top of a previous
1227       // alt ref frame.
1228       // Segment coding disabled for compred testing
1229
1230       // Enable ref frame features for segment 0 as well
1231       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1232       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1233
1234       // All mbs should use ALTREF_FRAME
1235       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1236       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1237       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1238       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1239
1240       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1241       if (high_q) {
1242         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1243         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1244       }
1245       // Enable data update
1246       seg->update_data = 1;
1247     } else {
1248       // All other frames.
1249
1250       // No updates.. leave things as they are.
1251       seg->update_map = 0;
1252       seg->update_data = 0;
1253     }
1254   }
1255 }
1256 #endif  // !CONFIG_REALTIME_ONLY
1257
1258 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1259   VP9_COMMON *const cm = &cpi->common;
1260   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1261   uint8_t *cache_ptr = cm->last_frame_seg_map;
1262   int row, col;
1263
1264   for (row = 0; row < cm->mi_rows; row++) {
1265     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1266     uint8_t *cache = cache_ptr;
1267     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1268       cache[0] = mi_8x8[0]->segment_id;
1269     mi_8x8_ptr += cm->mi_stride;
1270     cache_ptr += cm->mi_cols;
1271   }
1272 }
1273
1274 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1275   VP9_COMMON *cm = &cpi->common;
1276   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1277
1278   if (!cpi->lookahead)
1279     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1280                                         cm->subsampling_x, cm->subsampling_y,
1281 #if CONFIG_VP9_HIGHBITDEPTH
1282                                         cm->use_highbitdepth,
1283 #endif
1284                                         oxcf->lag_in_frames);
1285   if (!cpi->lookahead)
1286     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1287                        "Failed to allocate lag buffers");
1288
1289   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1290   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1291                                cm->subsampling_x, cm->subsampling_y,
1292 #if CONFIG_VP9_HIGHBITDEPTH
1293                                cm->use_highbitdepth,
1294 #endif
1295                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1296                                NULL, NULL, NULL))
1297     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1298                        "Failed to allocate altref buffer");
1299 }
1300
1301 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1302   VP9_COMMON *const cm = &cpi->common;
1303   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1304                                cm->subsampling_x, cm->subsampling_y,
1305 #if CONFIG_VP9_HIGHBITDEPTH
1306                                cm->use_highbitdepth,
1307 #endif
1308                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1309                                NULL, NULL, NULL))
1310     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1311                        "Failed to allocate last frame buffer");
1312
1313   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1314                                cm->subsampling_x, cm->subsampling_y,
1315 #if CONFIG_VP9_HIGHBITDEPTH
1316                                cm->use_highbitdepth,
1317 #endif
1318                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1319                                NULL, NULL, NULL))
1320     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1321                        "Failed to allocate scaled source buffer");
1322
1323   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1324   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1325   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1326   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1327       cpi->svc.number_spatial_layers > 2) {
1328     cpi->svc.scaled_temp_is_alloc = 1;
1329     if (vpx_realloc_frame_buffer(
1330             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1331             cm->subsampling_x, cm->subsampling_y,
1332 #if CONFIG_VP9_HIGHBITDEPTH
1333             cm->use_highbitdepth,
1334 #endif
1335             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1336       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1337                          "Failed to allocate scaled_frame for svc ");
1338   }
1339
1340   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1341                                cm->subsampling_x, cm->subsampling_y,
1342 #if CONFIG_VP9_HIGHBITDEPTH
1343                                cm->use_highbitdepth,
1344 #endif
1345                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1346                                NULL, NULL, NULL))
1347     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1348                        "Failed to allocate scaled last source buffer");
1349 #ifdef ENABLE_KF_DENOISE
1350   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1351                                cm->subsampling_x, cm->subsampling_y,
1352 #if CONFIG_VP9_HIGHBITDEPTH
1353                                cm->use_highbitdepth,
1354 #endif
1355                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1356                                NULL, NULL, NULL))
1357     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1358                        "Failed to allocate unscaled raw source frame buffer");
1359
1360   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1361                                cm->subsampling_x, cm->subsampling_y,
1362 #if CONFIG_VP9_HIGHBITDEPTH
1363                                cm->use_highbitdepth,
1364 #endif
1365                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1366                                NULL, NULL, NULL))
1367     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1368                        "Failed to allocate scaled raw source frame buffer");
1369 #endif
1370 }
1371
1372 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1373   VP9_COMMON *cm = &cpi->common;
1374   int mi_size = cm->mi_cols * cm->mi_rows;
1375
1376   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1377   if (!cpi->mbmi_ext_base) return 1;
1378
1379   return 0;
1380 }
1381
1382 static void alloc_compressor_data(VP9_COMP *cpi) {
1383   VP9_COMMON *cm = &cpi->common;
1384   int sb_rows;
1385
1386   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1387
1388   alloc_context_buffers_ext(cpi);
1389
1390   vpx_free(cpi->tile_tok[0][0]);
1391
1392   {
1393     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1394     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1395                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1396   }
1397
1398   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1399   vpx_free(cpi->tplist[0][0]);
1400   CHECK_MEM_ERROR(
1401       cm, cpi->tplist[0][0],
1402       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1403
1404   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1405 }
1406
1407 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1408   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1409   vp9_rc_update_framerate(cpi);
1410 }
1411
1412 static void set_tile_limits(VP9_COMP *cpi) {
1413   VP9_COMMON *const cm = &cpi->common;
1414
1415   int min_log2_tile_cols, max_log2_tile_cols;
1416   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1417
1418   cm->log2_tile_cols =
1419       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1420   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1421
1422   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1423     const int level_tile_cols =
1424         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1425     if (cm->log2_tile_cols > level_tile_cols) {
1426       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1427     }
1428   }
1429 }
1430
1431 static void update_frame_size(VP9_COMP *cpi) {
1432   VP9_COMMON *const cm = &cpi->common;
1433   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1434
1435   vp9_set_mb_mi(cm, cm->width, cm->height);
1436   vp9_init_context_buffers(cm);
1437   vp9_init_macroblockd(cm, xd, NULL);
1438   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1439   memset(cpi->mbmi_ext_base, 0,
1440          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1441
1442   set_tile_limits(cpi);
1443 }
1444
1445 static void init_buffer_indices(VP9_COMP *cpi) {
1446   int ref_frame;
1447
1448   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1449     cpi->ref_fb_idx[ref_frame] = ref_frame;
1450
1451   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1452   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1453   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1454 }
1455
1456 static void init_level_constraint(LevelConstraint *lc) {
1457   lc->level_index = -1;
1458   lc->max_cpb_size = INT_MAX;
1459   lc->max_frame_size = INT_MAX;
1460   lc->fail_flag = 0;
1461 }
1462
1463 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1464   vpx_clear_system_state();
1465   ls->level_index = level_index;
1466   if (level_index >= 0) {
1467     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1468   }
1469 }
1470
1471 static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
1472   VP9_COMMON *const cm = &cpi->common;
1473
1474   cpi->oxcf = *oxcf;
1475   cpi->framerate = oxcf->init_framerate;
1476   cm->profile = oxcf->profile;
1477   cm->bit_depth = oxcf->bit_depth;
1478 #if CONFIG_VP9_HIGHBITDEPTH
1479   cm->use_highbitdepth = oxcf->use_highbitdepth;
1480 #endif
1481   cm->color_space = oxcf->color_space;
1482   cm->color_range = oxcf->color_range;
1483
1484   cpi->target_level = oxcf->target_level;
1485   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1486   set_level_constraint(&cpi->level_constraint,
1487                        get_level_index(cpi->target_level));
1488
1489   cm->width = oxcf->width;
1490   cm->height = oxcf->height;
1491   alloc_compressor_data(cpi);
1492
1493   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1494
1495   // Single thread case: use counts in common.
1496   cpi->td.counts = &cm->counts;
1497
1498   // Spatial scalability.
1499   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1500   // Temporal scalability.
1501   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1502
1503   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1504       ((cpi->svc.number_temporal_layers > 1 ||
1505         cpi->svc.number_spatial_layers > 1) &&
1506        cpi->oxcf.pass != 1)) {
1507     vp9_init_layer_context(cpi);
1508   }
1509
1510   // change includes all joint functionality
1511   vp9_change_config(cpi, oxcf);
1512
1513   cpi->static_mb_pct = 0;
1514   cpi->ref_frame_flags = 0;
1515
1516   init_buffer_indices(cpi);
1517
1518   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1519 }
1520
1521 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
1522                                 const VP9EncoderConfig *oxcf) {
1523   const int64_t bandwidth = oxcf->target_bandwidth;
1524   const int64_t starting = oxcf->starting_buffer_level_ms;
1525   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1526   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1527
1528   rc->starting_buffer_level = starting * bandwidth / 1000;
1529   rc->optimal_buffer_level =
1530       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1531   rc->maximum_buffer_size =
1532       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1533 }
1534
1535 #if CONFIG_VP9_HIGHBITDEPTH
1536 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1537 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1538   cpi->fn_ptr[BT].sdf = SDF;                             \
1539   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1540   cpi->fn_ptr[BT].vf = VF;                               \
1541   cpi->fn_ptr[BT].svf = SVF;                             \
1542   cpi->fn_ptr[BT].svaf = SVAF;                           \
1543   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1544   cpi->fn_ptr[BT].sdx8f = NULL;
1545
1546 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1547   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1548                                      int source_stride,                        \
1549                                      const uint8_t *ref_ptr, int ref_stride) { \
1550     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1551   }                                                                            \
1552   static unsigned int fnname##_bits10(                                         \
1553       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1554       int ref_stride) {                                                        \
1555     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1556   }                                                                            \
1557   static unsigned int fnname##_bits12(                                         \
1558       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1559       int ref_stride) {                                                        \
1560     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1561   }
1562
1563 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1564   static unsigned int fnname##_bits8(                                          \
1565       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1566       int ref_stride, const uint8_t *second_pred) {                            \
1567     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1568   }                                                                            \
1569   static unsigned int fnname##_bits10(                                         \
1570       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1571       int ref_stride, const uint8_t *second_pred) {                            \
1572     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1573            2;                                                                  \
1574   }                                                                            \
1575   static unsigned int fnname##_bits12(                                         \
1576       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1577       int ref_stride, const uint8_t *second_pred) {                            \
1578     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1579            4;                                                                  \
1580   }
1581
1582 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1583   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1584                              const uint8_t *const ref_ptr[], int ref_stride,  \
1585                              unsigned int *sad_array) {                       \
1586     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1587   }                                                                           \
1588   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1589                               const uint8_t *const ref_ptr[], int ref_stride, \
1590                               unsigned int *sad_array) {                      \
1591     int i;                                                                    \
1592     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1593     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1594   }                                                                           \
1595   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1596                               const uint8_t *const ref_ptr[], int ref_stride, \
1597                               unsigned int *sad_array) {                      \
1598     int i;                                                                    \
1599     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1600     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1601   }
1602
1603 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1604 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1605 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1606 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1607 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1608 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1609 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1610 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1611 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1612 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1613 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1614 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1615 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1616 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1617 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1618 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1619 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1620 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1621 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1622 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1623 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1624 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1625 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1626 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1627 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1628 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1629 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1630 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1631 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1632 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1633 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1634 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1635 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1636 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1637 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1638 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1639 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1640 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1641 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1642
1643 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1644   VP9_COMMON *const cm = &cpi->common;
1645   if (cm->use_highbitdepth) {
1646     switch (cm->bit_depth) {
1647       case VPX_BITS_8:
1648         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1649                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1650                    vpx_highbd_8_sub_pixel_variance32x16,
1651                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1652                    vpx_highbd_sad32x16x4d_bits8)
1653
1654         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1655                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1656                    vpx_highbd_8_sub_pixel_variance16x32,
1657                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1658                    vpx_highbd_sad16x32x4d_bits8)
1659
1660         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1661                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1662                    vpx_highbd_8_sub_pixel_variance64x32,
1663                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1664                    vpx_highbd_sad64x32x4d_bits8)
1665
1666         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1667                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1668                    vpx_highbd_8_sub_pixel_variance32x64,
1669                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1670                    vpx_highbd_sad32x64x4d_bits8)
1671
1672         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1673                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1674                    vpx_highbd_8_sub_pixel_variance32x32,
1675                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1676                    vpx_highbd_sad32x32x4d_bits8)
1677
1678         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1679                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1680                    vpx_highbd_8_sub_pixel_variance64x64,
1681                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1682                    vpx_highbd_sad64x64x4d_bits8)
1683
1684         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1685                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1686                    vpx_highbd_8_sub_pixel_variance16x16,
1687                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1688                    vpx_highbd_sad16x16x4d_bits8)
1689
1690         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1691                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1692                    vpx_highbd_8_sub_pixel_variance16x8,
1693                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1694                    vpx_highbd_sad16x8x4d_bits8)
1695
1696         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1697                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1698                    vpx_highbd_8_sub_pixel_variance8x16,
1699                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1700                    vpx_highbd_sad8x16x4d_bits8)
1701
1702         HIGHBD_BFP(
1703             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1704             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1705             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1706
1707         HIGHBD_BFP(
1708             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1709             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1710             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1711
1712         HIGHBD_BFP(
1713             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1714             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1715             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1716
1717         HIGHBD_BFP(
1718             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1719             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1720             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1721         break;
1722
1723       case VPX_BITS_10:
1724         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1725                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1726                    vpx_highbd_10_sub_pixel_variance32x16,
1727                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1728                    vpx_highbd_sad32x16x4d_bits10)
1729
1730         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1731                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1732                    vpx_highbd_10_sub_pixel_variance16x32,
1733                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1734                    vpx_highbd_sad16x32x4d_bits10)
1735
1736         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1737                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1738                    vpx_highbd_10_sub_pixel_variance64x32,
1739                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1740                    vpx_highbd_sad64x32x4d_bits10)
1741
1742         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1743                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1744                    vpx_highbd_10_sub_pixel_variance32x64,
1745                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1746                    vpx_highbd_sad32x64x4d_bits10)
1747
1748         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1749                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1750                    vpx_highbd_10_sub_pixel_variance32x32,
1751                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1752                    vpx_highbd_sad32x32x4d_bits10)
1753
1754         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1755                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1756                    vpx_highbd_10_sub_pixel_variance64x64,
1757                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1758                    vpx_highbd_sad64x64x4d_bits10)
1759
1760         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1761                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1762                    vpx_highbd_10_sub_pixel_variance16x16,
1763                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1764                    vpx_highbd_sad16x16x4d_bits10)
1765
1766         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1767                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1768                    vpx_highbd_10_sub_pixel_variance16x8,
1769                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1770                    vpx_highbd_sad16x8x4d_bits10)
1771
1772         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1773                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1774                    vpx_highbd_10_sub_pixel_variance8x16,
1775                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1776                    vpx_highbd_sad8x16x4d_bits10)
1777
1778         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1779                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1780                    vpx_highbd_10_sub_pixel_variance8x8,
1781                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1782                    vpx_highbd_sad8x8x4d_bits10)
1783
1784         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1785                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1786                    vpx_highbd_10_sub_pixel_variance8x4,
1787                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1788                    vpx_highbd_sad8x4x4d_bits10)
1789
1790         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1791                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1792                    vpx_highbd_10_sub_pixel_variance4x8,
1793                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1794                    vpx_highbd_sad4x8x4d_bits10)
1795
1796         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1797                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1798                    vpx_highbd_10_sub_pixel_variance4x4,
1799                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1800                    vpx_highbd_sad4x4x4d_bits10)
1801         break;
1802
1803       default:
1804         assert(cm->bit_depth == VPX_BITS_12);
1805         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1806                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1807                    vpx_highbd_12_sub_pixel_variance32x16,
1808                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1809                    vpx_highbd_sad32x16x4d_bits12)
1810
1811         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1812                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1813                    vpx_highbd_12_sub_pixel_variance16x32,
1814                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1815                    vpx_highbd_sad16x32x4d_bits12)
1816
1817         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1818                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1819                    vpx_highbd_12_sub_pixel_variance64x32,
1820                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1821                    vpx_highbd_sad64x32x4d_bits12)
1822
1823         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1824                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1825                    vpx_highbd_12_sub_pixel_variance32x64,
1826                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1827                    vpx_highbd_sad32x64x4d_bits12)
1828
1829         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1830                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1831                    vpx_highbd_12_sub_pixel_variance32x32,
1832                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1833                    vpx_highbd_sad32x32x4d_bits12)
1834
1835         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1836                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1837                    vpx_highbd_12_sub_pixel_variance64x64,
1838                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1839                    vpx_highbd_sad64x64x4d_bits12)
1840
1841         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1842                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1843                    vpx_highbd_12_sub_pixel_variance16x16,
1844                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1845                    vpx_highbd_sad16x16x4d_bits12)
1846
1847         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1848                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1849                    vpx_highbd_12_sub_pixel_variance16x8,
1850                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1851                    vpx_highbd_sad16x8x4d_bits12)
1852
1853         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1854                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1855                    vpx_highbd_12_sub_pixel_variance8x16,
1856                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1857                    vpx_highbd_sad8x16x4d_bits12)
1858
1859         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1860                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1861                    vpx_highbd_12_sub_pixel_variance8x8,
1862                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1863                    vpx_highbd_sad8x8x4d_bits12)
1864
1865         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1866                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1867                    vpx_highbd_12_sub_pixel_variance8x4,
1868                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1869                    vpx_highbd_sad8x4x4d_bits12)
1870
1871         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1872                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1873                    vpx_highbd_12_sub_pixel_variance4x8,
1874                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1875                    vpx_highbd_sad4x8x4d_bits12)
1876
1877         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1878                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1879                    vpx_highbd_12_sub_pixel_variance4x4,
1880                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1881                    vpx_highbd_sad4x4x4d_bits12)
1882         break;
1883     }
1884   }
1885 }
1886 #endif  // CONFIG_VP9_HIGHBITDEPTH
1887
1888 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1889   VP9_COMMON *const cm = &cpi->common;
1890
1891   // Create the encoder segmentation map and set all entries to 0
1892   vpx_free(cpi->segmentation_map);
1893   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1894                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1895
1896   // Create a map used for cyclic background refresh.
1897   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1898   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1899                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1900
1901   // Create a map used to mark inactive areas.
1902   vpx_free(cpi->active_map.map);
1903   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1904                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1905
1906   // And a place holder structure is the coding context
1907   // for use if we want to save and restore it
1908   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1909   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1910                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1911 }
1912
1913 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1914   VP9_COMMON *const cm = &cpi->common;
1915   if (cpi->prev_partition == NULL) {
1916     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1917                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1918                                              sizeof(*cpi->prev_partition)));
1919   }
1920   if (cpi->prev_segment_id == NULL) {
1921     CHECK_MEM_ERROR(
1922         cm, cpi->prev_segment_id,
1923         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1924                              sizeof(*cpi->prev_segment_id)));
1925   }
1926   if (cpi->prev_variance_low == NULL) {
1927     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1928                     (uint8_t *)vpx_calloc(
1929                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1930                         sizeof(*cpi->prev_variance_low)));
1931   }
1932   if (cpi->copied_frame_cnt == NULL) {
1933     CHECK_MEM_ERROR(
1934         cm, cpi->copied_frame_cnt,
1935         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1936                               sizeof(*cpi->copied_frame_cnt)));
1937   }
1938 }
1939
1940 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1941   VP9_COMMON *const cm = &cpi->common;
1942   RATE_CONTROL *const rc = &cpi->rc;
1943   int last_w = cpi->oxcf.width;
1944   int last_h = cpi->oxcf.height;
1945
1946   vp9_init_quantizer(cpi);
1947   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1948   cm->bit_depth = oxcf->bit_depth;
1949   cm->color_space = oxcf->color_space;
1950   cm->color_range = oxcf->color_range;
1951
1952   cpi->target_level = oxcf->target_level;
1953   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1954   set_level_constraint(&cpi->level_constraint,
1955                        get_level_index(cpi->target_level));
1956
1957   if (cm->profile <= PROFILE_1)
1958     assert(cm->bit_depth == VPX_BITS_8);
1959   else
1960     assert(cm->bit_depth > VPX_BITS_8);
1961
1962   cpi->oxcf = *oxcf;
1963 #if CONFIG_VP9_HIGHBITDEPTH
1964   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1965 #endif  // CONFIG_VP9_HIGHBITDEPTH
1966
1967   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
1968     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
1969   } else {
1970     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
1971   }
1972
1973   cpi->refresh_golden_frame = 0;
1974   cpi->refresh_last_frame = 1;
1975   cm->refresh_frame_context = 1;
1976   cm->reset_frame_context = 0;
1977
1978   vp9_reset_segment_features(&cm->seg);
1979   vp9_set_high_precision_mv(cpi, 0);
1980
1981   {
1982     int i;
1983
1984     for (i = 0; i < MAX_SEGMENTS; i++)
1985       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
1986   }
1987   cpi->encode_breakout = cpi->oxcf.encode_breakout;
1988
1989   set_rc_buffer_sizes(rc, &cpi->oxcf);
1990
1991   // Under a configuration change, where maximum_buffer_size may change,
1992   // keep buffer level clipped to the maximum allowed buffer size.
1993   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1994   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
1995
1996   // Set up frame rate and related parameters rate control values.
1997   vp9_new_framerate(cpi, cpi->framerate);
1998
1999   // Set absolute upper and lower quality limits
2000   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2001   rc->best_quality = cpi->oxcf.best_allowed_q;
2002
2003   cm->interp_filter = cpi->sf.default_interp_filter;
2004
2005   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2006     cm->render_width = cpi->oxcf.render_width;
2007     cm->render_height = cpi->oxcf.render_height;
2008   } else {
2009     cm->render_width = cpi->oxcf.width;
2010     cm->render_height = cpi->oxcf.height;
2011   }
2012   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2013     cm->width = cpi->oxcf.width;
2014     cm->height = cpi->oxcf.height;
2015     cpi->external_resize = 1;
2016   }
2017
2018   if (cpi->initial_width) {
2019     int new_mi_size = 0;
2020     vp9_set_mb_mi(cm, cm->width, cm->height);
2021     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2022     if (cm->mi_alloc_size < new_mi_size) {
2023       vp9_free_context_buffers(cm);
2024       alloc_compressor_data(cpi);
2025       realloc_segmentation_maps(cpi);
2026       cpi->initial_width = cpi->initial_height = 0;
2027       cpi->external_resize = 0;
2028     } else if (cm->mi_alloc_size == new_mi_size &&
2029                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2030       vp9_alloc_loop_filter(cm);
2031     }
2032   }
2033
2034   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2035       last_h != cpi->oxcf.height)
2036     update_frame_size(cpi);
2037
2038   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2039     memset(cpi->consec_zero_mv, 0,
2040            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2041     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2042       vp9_cyclic_refresh_reset_resize(cpi);
2043     rc->rc_1_frame = 0;
2044     rc->rc_2_frame = 0;
2045   }
2046
2047   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2048       ((cpi->svc.number_temporal_layers > 1 ||
2049         cpi->svc.number_spatial_layers > 1) &&
2050        cpi->oxcf.pass != 1)) {
2051     vp9_update_layer_context_change_config(cpi,
2052                                            (int)cpi->oxcf.target_bandwidth);
2053   }
2054
2055   // Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
2056   // configuration change has a large change in avg_frame_bandwidth.
2057   // For SVC check for resetting based on spatial layer average bandwidth.
2058   // Also reset buffer level to optimal level.
2059   if (cm->current_video_frame > 0) {
2060     if (cpi->use_svc) {
2061       vp9_svc_check_reset_layer_rc_flag(cpi);
2062     } else {
2063       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
2064           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
2065         rc->rc_1_frame = 0;
2066         rc->rc_2_frame = 0;
2067         rc->bits_off_target = rc->optimal_buffer_level;
2068         rc->buffer_level = rc->optimal_buffer_level;
2069       }
2070     }
2071   }
2072
2073   cpi->alt_ref_source = NULL;
2074   rc->is_src_frame_alt_ref = 0;
2075
2076 #if 0
2077   // Experimental RD Code
2078   cpi->frame_distortion = 0;
2079   cpi->last_frame_distortion = 0;
2080 #endif
2081
2082   set_tile_limits(cpi);
2083
2084   cpi->ext_refresh_frame_flags_pending = 0;
2085   cpi->ext_refresh_frame_context_pending = 0;
2086
2087 #if CONFIG_VP9_HIGHBITDEPTH
2088   highbd_set_var_fns(cpi);
2089 #endif
2090
2091   vp9_set_row_mt(cpi);
2092 }
2093
2094 #ifndef M_LOG2_E
2095 #define M_LOG2_E 0.693147180559945309417
2096 #endif
2097 #define log2f(x) (log(x) / (float)M_LOG2_E)
2098
2099 /***********************************************************************
2100  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2101  ***********************************************************************
2102  * The following 2 functions ('cal_nmvjointsadcost' and                *
2103  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2104  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2105  * function is generic, but the AVX intrinsics optimised version       *
2106  * relies on the following properties of the computed tables:          *
2107  * For cal_nmvjointsadcost:                                            *
2108  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2109  * For cal_nmvsadcosts:                                                *
2110  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2111  *         (Equal costs for both components)                           *
2112  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2113  *         (Cost function is even)                                     *
2114  * If these do not hold, then the AVX optimised version of the         *
2115  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2116  * case you can revert to using the C function instead.                *
2117  ***********************************************************************/
2118
2119 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2120   /*********************************************************************
2121    * Warning: Read the comments above before modifying this function   *
2122    *********************************************************************/
2123   mvjointsadcost[0] = 600;
2124   mvjointsadcost[1] = 300;
2125   mvjointsadcost[2] = 300;
2126   mvjointsadcost[3] = 300;
2127 }
2128
2129 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2130   /*********************************************************************
2131    * Warning: Read the comments above before modifying this function   *
2132    *********************************************************************/
2133   int i = 1;
2134
2135   mvsadcost[0][0] = 0;
2136   mvsadcost[1][0] = 0;
2137
2138   do {
2139     double z = 256 * (2 * (log2f(8 * i) + .6));
2140     mvsadcost[0][i] = (int)z;
2141     mvsadcost[1][i] = (int)z;
2142     mvsadcost[0][-i] = (int)z;
2143     mvsadcost[1][-i] = (int)z;
2144   } while (++i <= MV_MAX);
2145 }
2146
2147 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2148   int i = 1;
2149
2150   mvsadcost[0][0] = 0;
2151   mvsadcost[1][0] = 0;
2152
2153   do {
2154     double z = 256 * (2 * (log2f(8 * i) + .6));
2155     mvsadcost[0][i] = (int)z;
2156     mvsadcost[1][i] = (int)z;
2157     mvsadcost[0][-i] = (int)z;
2158     mvsadcost[1][-i] = (int)z;
2159   } while (++i <= MV_MAX);
2160 }
2161
2162 static void init_ref_frame_bufs(VP9_COMMON *cm) {
2163   int i;
2164   BufferPool *const pool = cm->buffer_pool;
2165   cm->new_fb_idx = INVALID_IDX;
2166   for (i = 0; i < REF_FRAMES; ++i) {
2167     cm->ref_frame_map[i] = INVALID_IDX;
2168   }
2169   for (i = 0; i < FRAME_BUFFERS; ++i) {
2170     pool->frame_bufs[i].ref_count = 0;
2171   }
2172 }
2173
2174 VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
2175                                 BufferPool *const pool) {
2176   unsigned int i;
2177   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2178   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2179
2180   if (!cm) return NULL;
2181
2182   vp9_zero(*cpi);
2183
2184   if (setjmp(cm->error.jmp)) {
2185     cm->error.setjmp = 0;
2186     vp9_remove_compressor(cpi);
2187     return 0;
2188   }
2189
2190   cm->error.setjmp = 1;
2191   cm->alloc_mi = vp9_enc_alloc_mi;
2192   cm->free_mi = vp9_enc_free_mi;
2193   cm->setup_mi = vp9_enc_setup_mi;
2194
2195   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2196   CHECK_MEM_ERROR(
2197       cm, cm->frame_contexts,
2198       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2199
2200   cpi->use_svc = 0;
2201   cpi->resize_state = ORIG;
2202   cpi->external_resize = 0;
2203   cpi->resize_avg_qp = 0;
2204   cpi->resize_buffer_underflow = 0;
2205   cpi->use_skin_detection = 0;
2206   cpi->common.buffer_pool = pool;
2207   init_ref_frame_bufs(cm);
2208
2209   cpi->force_update_segmentation = 0;
2210
2211   init_config(cpi, oxcf);
2212   cpi->frame_info = vp9_get_frame_info(oxcf);
2213
2214   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2215
2216   cm->current_video_frame = 0;
2217   cpi->partition_search_skippable_frame = 0;
2218   cpi->tile_data = NULL;
2219
2220   realloc_segmentation_maps(cpi);
2221
2222   CHECK_MEM_ERROR(
2223       cm, cpi->skin_map,
2224       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2225
2226 #if !CONFIG_REALTIME_ONLY
2227   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2228 #endif
2229
2230   CHECK_MEM_ERROR(
2231       cm, cpi->consec_zero_mv,
2232       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2233
2234   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2235                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2236   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2237                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2238   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2239                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2240   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2241                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2242   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2243                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2244   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2245                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2246   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2247                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2248   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2249                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2250
2251   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2252        i++) {
2253     CHECK_MEM_ERROR(
2254         cm, cpi->mbgraph_stats[i].mb_stats,
2255         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2256   }
2257
2258 #if CONFIG_FP_MB_STATS
2259   cpi->use_fp_mb_stats = 0;
2260   if (cpi->use_fp_mb_stats) {
2261     // a place holder used to store the first pass mb stats in the first pass
2262     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2263                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2264   } else {
2265     cpi->twopass.frame_mb_stats_buf = NULL;
2266   }
2267 #endif
2268
2269   cpi->refresh_alt_ref_frame = 0;
2270   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2271
2272   init_level_info(&cpi->level_info);
2273   init_level_constraint(&cpi->level_constraint);
2274
2275 #if CONFIG_INTERNAL_STATS
2276   cpi->b_calculate_blockiness = 1;
2277   cpi->b_calculate_consistency = 1;
2278   cpi->total_inconsistency = 0;
2279   cpi->psnr.worst = 100.0;
2280   cpi->worst_ssim = 100.0;
2281
2282   cpi->count = 0;
2283   cpi->bytes = 0;
2284
2285   if (cpi->b_calculate_psnr) {
2286     cpi->total_sq_error = 0;
2287     cpi->total_samples = 0;
2288
2289     cpi->totalp_sq_error = 0;
2290     cpi->totalp_samples = 0;
2291
2292     cpi->tot_recode_hits = 0;
2293     cpi->summed_quality = 0;
2294     cpi->summed_weights = 0;
2295     cpi->summedp_quality = 0;
2296     cpi->summedp_weights = 0;
2297   }
2298
2299   cpi->fastssim.worst = 100.0;
2300
2301   cpi->psnrhvs.worst = 100.0;
2302
2303   if (cpi->b_calculate_blockiness) {
2304     cpi->total_blockiness = 0;
2305     cpi->worst_blockiness = 0.0;
2306   }
2307
2308   if (cpi->b_calculate_consistency) {
2309     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2310                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2311                                sizeof(*cpi->ssim_vars) * 4));
2312     cpi->worst_consistency = 100.0;
2313   } else {
2314     cpi->ssim_vars = NULL;
2315   }
2316
2317 #endif
2318
2319   cpi->first_time_stamp_ever = INT64_MAX;
2320
2321   /*********************************************************************
2322    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2323    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2324    *********************************************************************/
2325   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2326   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2327   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2328   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2329   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2330   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2331
2332   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2333   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2334   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2335   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2336   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2337
2338 #if CONFIG_VP9_TEMPORAL_DENOISING
2339 #ifdef OUTPUT_YUV_DENOISED
2340   yuv_denoised_file = fopen("denoised.yuv", "ab");
2341 #endif
2342 #endif
2343 #ifdef OUTPUT_YUV_SKINMAP
2344   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2345 #endif
2346 #ifdef OUTPUT_YUV_REC
2347   yuv_rec_file = fopen("rec.yuv", "wb");
2348 #endif
2349 #ifdef OUTPUT_YUV_SVC_SRC
2350   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2351   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2352   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2353 #endif
2354
2355 #if 0
2356   framepsnr = fopen("framepsnr.stt", "a");
2357   kf_list = fopen("kf_list.stt", "w");
2358 #endif
2359
2360   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2361
2362 #if !CONFIG_REALTIME_ONLY
2363   if (oxcf->pass == 1) {
2364     vp9_init_first_pass(cpi);
2365   } else if (oxcf->pass == 2) {
2366     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2367     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2368
2369     if (cpi->svc.number_spatial_layers > 1 ||
2370         cpi->svc.number_temporal_layers > 1) {
2371       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2372       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2373       int i;
2374
2375       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2376         FIRSTPASS_STATS *const last_packet_for_layer =
2377             &stats[packets - oxcf->ss_number_layers + i];
2378         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2379         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2380         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2381           int num_frames;
2382           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2383
2384           vpx_free(lc->rc_twopass_stats_in.buf);
2385
2386           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2387           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2388                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2389           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2390           lc->twopass.stats_in = lc->twopass.stats_in_start;
2391           lc->twopass.stats_in_end =
2392               lc->twopass.stats_in_start + packets_in_layer - 1;
2393           // Note the last packet is cumulative first pass stats.
2394           // So the number of frames is packet number minus one
2395           num_frames = packets_in_layer - 1;
2396           fps_init_first_pass_info(&lc->twopass.first_pass_info,
2397                                    lc->rc_twopass_stats_in.buf, num_frames);
2398           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2399         }
2400       }
2401
2402       for (i = 0; i < packets; ++i) {
2403         const int layer_id = (int)stats[i].spatial_layer_id;
2404         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2405             stats_copy[layer_id] != NULL) {
2406           *stats_copy[layer_id] = stats[i];
2407           ++stats_copy[layer_id];
2408         }
2409       }
2410
2411       vp9_init_second_pass_spatial_svc(cpi);
2412     } else {
2413       int num_frames;
2414 #if CONFIG_FP_MB_STATS
2415       if (cpi->use_fp_mb_stats) {
2416         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2417         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2418
2419         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2420             oxcf->firstpass_mb_stats_in.buf;
2421         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2422             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2423             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2424       }
2425 #endif
2426
2427       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2428       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2429       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2430       // Note the last packet is cumulative first pass stats.
2431       // So the number of frames is packet number minus one
2432       num_frames = packets - 1;
2433       fps_init_first_pass_info(&cpi->twopass.first_pass_info,
2434                                oxcf->two_pass_stats_in.buf, num_frames);
2435
2436       vp9_init_second_pass(cpi);
2437     }
2438   }
2439 #endif  // !CONFIG_REALTIME_ONLY
2440
2441   cpi->mb_wiener_var_cols = 0;
2442   cpi->mb_wiener_var_rows = 0;
2443   cpi->mb_wiener_variance = NULL;
2444
2445   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2446   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2447
2448   {
2449     const int bsize = BLOCK_16X16;
2450     const int w = num_8x8_blocks_wide_lookup[bsize];
2451     const int h = num_8x8_blocks_high_lookup[bsize];
2452     const int num_cols = (cm->mi_cols + w - 1) / w;
2453     const int num_rows = (cm->mi_rows + h - 1) / h;
2454     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2455                     vpx_calloc(num_rows * num_cols,
2456                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2457   }
2458
2459   cpi->kmeans_data_arr_alloc = 0;
2460 #if CONFIG_NON_GREEDY_MV
2461   cpi->tpl_ready = 0;
2462 #endif  // CONFIG_NON_GREEDY_MV
2463   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2464
2465   // Allocate memory to store variances for a frame.
2466   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2467   cpi->source_var_thresh = 0;
2468   cpi->frames_till_next_var_check = 0;
2469 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2470   cpi->fn_ptr[BT].sdf = SDF;                             \
2471   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2472   cpi->fn_ptr[BT].vf = VF;                               \
2473   cpi->fn_ptr[BT].svf = SVF;                             \
2474   cpi->fn_ptr[BT].svaf = SVAF;                           \
2475   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2476   cpi->fn_ptr[BT].sdx8f = SDX8F;
2477
2478   // TODO(angiebird): make sdx8f available for every block size
2479   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2480       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2481       vpx_sad32x16x4d, NULL)
2482
2483   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2484       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2485       vpx_sad16x32x4d, NULL)
2486
2487   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2488       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2489       vpx_sad64x32x4d, NULL)
2490
2491   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2492       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2493       vpx_sad32x64x4d, NULL)
2494
2495   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2496       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2497       vpx_sad32x32x4d, vpx_sad32x32x8)
2498
2499   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2500       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2501       vpx_sad64x64x4d, NULL)
2502
2503   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2504       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2505       vpx_sad16x16x4d, vpx_sad16x16x8)
2506
2507   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2508       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2509       vpx_sad16x8x4d, vpx_sad16x8x8)
2510
2511   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2512       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2513       vpx_sad8x16x4d, vpx_sad8x16x8)
2514
2515   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2516       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2517       vpx_sad8x8x8)
2518
2519   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2520       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2521       NULL)
2522
2523   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2524       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2525       NULL)
2526
2527   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2528       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2529       vpx_sad4x4x8)
2530
2531 #if CONFIG_VP9_HIGHBITDEPTH
2532   highbd_set_var_fns(cpi);
2533 #endif
2534
2535   /* vp9_init_quantizer() is first called here. Add check in
2536    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2537    * called later when needed. This will avoid unnecessary calls of
2538    * vp9_init_quantizer() for every frame.
2539    */
2540   vp9_init_quantizer(cpi);
2541
2542   vp9_loop_filter_init(cm);
2543
2544   // Set up the unit scaling factor used during motion search.
2545 #if CONFIG_VP9_HIGHBITDEPTH
2546   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2547                                     cm->width, cm->height,
2548                                     cm->use_highbitdepth);
2549 #else
2550   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2551                                     cm->width, cm->height);
2552 #endif  // CONFIG_VP9_HIGHBITDEPTH
2553   cpi->td.mb.me_sf = &cpi->me_sf;
2554
2555   cm->error.setjmp = 0;
2556
2557   return cpi;
2558 }
2559
2560 #if CONFIG_INTERNAL_STATS
2561 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2562
2563 #define SNPRINT2(H, T, V) \
2564   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2565 #endif  // CONFIG_INTERNAL_STATS
2566
2567 static void free_tpl_buffer(VP9_COMP *cpi);
2568
2569 void vp9_remove_compressor(VP9_COMP *cpi) {
2570   VP9_COMMON *cm;
2571   unsigned int i;
2572   int t;
2573
2574   if (!cpi) return;
2575
2576 #if CONFIG_INTERNAL_STATS
2577   vpx_free(cpi->ssim_vars);
2578 #endif
2579
2580   cm = &cpi->common;
2581   if (cm->current_video_frame > 0) {
2582 #if CONFIG_INTERNAL_STATS
2583     vpx_clear_system_state();
2584
2585     if (cpi->oxcf.pass != 1) {
2586       char headings[512] = { 0 };
2587       char results[512] = { 0 };
2588       FILE *f = fopen("opsnr.stt", "a");
2589       double time_encoded =
2590           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2591           10000000.000;
2592       double total_encode_time =
2593           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2594       const double dr =
2595           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2596       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2597       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2598       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2599
2600       if (cpi->b_calculate_psnr) {
2601         const double total_psnr = vpx_sse_to_psnr(
2602             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2603         const double totalp_psnr = vpx_sse_to_psnr(
2604             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2605         const double total_ssim =
2606             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2607         const double totalp_ssim =
2608             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2609
2610         snprintf(headings, sizeof(headings),
2611                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2612                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2613                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2614                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2615         snprintf(results, sizeof(results),
2616                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2617                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2618                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2619                  "%7.3f\t%7.3f\t%7.3f",
2620                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2621                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2622                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2623                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2624                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2625                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2626                  cpi->psnr.stat[V] / cpi->count);
2627
2628         if (cpi->b_calculate_blockiness) {
2629           SNPRINT(headings, "\t  Block\tWstBlck");
2630           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2631           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2632         }
2633
2634         if (cpi->b_calculate_consistency) {
2635           double consistency =
2636               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2637                               (double)cpi->total_inconsistency);
2638
2639           SNPRINT(headings, "\tConsist\tWstCons");
2640           SNPRINT2(results, "\t%7.3f", consistency);
2641           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2642         }
2643
2644         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2645         SNPRINT2(results, "\t%8.0f", total_encode_time);
2646         SNPRINT2(results, "\t%7.2f", rate_err);
2647         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2648
2649         fprintf(f, "%s\tAPsnr611\n", headings);
2650         fprintf(
2651             f, "%s\t%7.3f\n", results,
2652             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2653                 (cpi->count * 8));
2654       }
2655
2656       fclose(f);
2657     }
2658 #endif
2659
2660 #if 0
2661     {
2662       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2663       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2664       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2665              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2666              cpi->time_compress_data / 1000,
2667              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2668     }
2669 #endif
2670   }
2671
2672 #if CONFIG_VP9_TEMPORAL_DENOISING
2673   vp9_denoiser_free(&(cpi->denoiser));
2674 #endif
2675
2676   if (cpi->kmeans_data_arr_alloc) {
2677 #if CONFIG_MULTITHREAD
2678     pthread_mutex_destroy(&cpi->kmeans_mutex);
2679 #endif
2680     vpx_free(cpi->kmeans_data_arr);
2681   }
2682
2683   free_tpl_buffer(cpi);
2684
2685   for (t = 0; t < cpi->num_workers; ++t) {
2686     VPxWorker *const worker = &cpi->workers[t];
2687     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2688
2689     // Deallocate allocated threads.
2690     vpx_get_worker_interface()->end(worker);
2691
2692     // Deallocate allocated thread data.
2693     if (t < cpi->num_workers - 1) {
2694       vpx_free(thread_data->td->counts);
2695       vp9_free_pc_tree(thread_data->td);
2696       vpx_free(thread_data->td);
2697     }
2698   }
2699   vpx_free(cpi->tile_thr_data);
2700   vpx_free(cpi->workers);
2701   vp9_row_mt_mem_dealloc(cpi);
2702
2703   if (cpi->num_workers > 1) {
2704     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2705     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2706   }
2707
2708 #if !CONFIG_REALTIME_ONLY
2709   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2710 #endif
2711
2712   dealloc_compressor_data(cpi);
2713
2714   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2715        ++i) {
2716     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2717   }
2718
2719 #if CONFIG_FP_MB_STATS
2720   if (cpi->use_fp_mb_stats) {
2721     vpx_free(cpi->twopass.frame_mb_stats_buf);
2722     cpi->twopass.frame_mb_stats_buf = NULL;
2723   }
2724 #endif
2725
2726   vp9_remove_common(cm);
2727   vp9_free_ref_frame_buffers(cm->buffer_pool);
2728 #if CONFIG_VP9_POSTPROC
2729   vp9_free_postproc_buffers(cm);
2730 #endif
2731   vpx_free(cpi);
2732
2733 #if CONFIG_VP9_TEMPORAL_DENOISING
2734 #ifdef OUTPUT_YUV_DENOISED
2735   fclose(yuv_denoised_file);
2736 #endif
2737 #endif
2738 #ifdef OUTPUT_YUV_SKINMAP
2739   fclose(yuv_skinmap_file);
2740 #endif
2741 #ifdef OUTPUT_YUV_REC
2742   fclose(yuv_rec_file);
2743 #endif
2744 #ifdef OUTPUT_YUV_SVC_SRC
2745   fclose(yuv_svc_src[0]);
2746   fclose(yuv_svc_src[1]);
2747   fclose(yuv_svc_src[2]);
2748 #endif
2749
2750 #if 0
2751
2752   if (keyfile)
2753     fclose(keyfile);
2754
2755   if (framepsnr)
2756     fclose(framepsnr);
2757
2758   if (kf_list)
2759     fclose(kf_list);
2760
2761 #endif
2762 }
2763
2764 int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
2765   if (is_psnr_calc_enabled(cpi)) {
2766 #if CONFIG_VP9_HIGHBITDEPTH
2767     vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
2768                          cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2769 #else
2770     vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
2771 #endif
2772     return 1;
2773   } else {
2774     vp9_zero(*psnr);
2775     return 0;
2776   }
2777 }
2778
2779 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2780   if (ref_frame_flags > 7) return -1;
2781
2782   cpi->ref_frame_flags = ref_frame_flags;
2783   return 0;
2784 }
2785
2786 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2787   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2788   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2789   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2790   cpi->ext_refresh_frame_flags_pending = 1;
2791 }
2792
2793 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2794     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2795   MV_REFERENCE_FRAME ref_frame = NONE;
2796   if (ref_frame_flag == VP9_LAST_FLAG)
2797     ref_frame = LAST_FRAME;
2798   else if (ref_frame_flag == VP9_GOLD_FLAG)
2799     ref_frame = GOLDEN_FRAME;
2800   else if (ref_frame_flag == VP9_ALT_FLAG)
2801     ref_frame = ALTREF_FRAME;
2802
2803   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2804 }
2805
2806 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2807                            YV12_BUFFER_CONFIG *sd) {
2808   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2809   if (cfg) {
2810     vpx_yv12_copy_frame(cfg, sd);
2811     return 0;
2812   } else {
2813     return -1;
2814   }
2815 }
2816
2817 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2818                           YV12_BUFFER_CONFIG *sd) {
2819   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2820   if (cfg) {
2821     vpx_yv12_copy_frame(sd, cfg);
2822     return 0;
2823   } else {
2824     return -1;
2825   }
2826 }
2827
2828 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2829   cpi->ext_refresh_frame_context = update;
2830   cpi->ext_refresh_frame_context_pending = 1;
2831   return 0;
2832 }
2833
2834 #ifdef OUTPUT_YUV_REC
2835 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2836   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2837   uint8_t *src = s->y_buffer;
2838   int h = cm->height;
2839
2840 #if CONFIG_VP9_HIGHBITDEPTH
2841   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2842     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2843
2844     do {
2845       fwrite(src16, s->y_width, 2, yuv_rec_file);
2846       src16 += s->y_stride;
2847     } while (--h);
2848
2849     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2850     h = s->uv_height;
2851
2852     do {
2853       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2854       src16 += s->uv_stride;
2855     } while (--h);
2856
2857     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2858     h = s->uv_height;
2859
2860     do {
2861       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2862       src16 += s->uv_stride;
2863     } while (--h);
2864
2865     fflush(yuv_rec_file);
2866     return;
2867   }
2868 #endif  // CONFIG_VP9_HIGHBITDEPTH
2869
2870   do {
2871     fwrite(src, s->y_width, 1, yuv_rec_file);
2872     src += s->y_stride;
2873   } while (--h);
2874
2875   src = s->u_buffer;
2876   h = s->uv_height;
2877
2878   do {
2879     fwrite(src, s->uv_width, 1, yuv_rec_file);
2880     src += s->uv_stride;
2881   } while (--h);
2882
2883   src = s->v_buffer;
2884   h = s->uv_height;
2885
2886   do {
2887     fwrite(src, s->uv_width, 1, yuv_rec_file);
2888     src += s->uv_stride;
2889   } while (--h);
2890
2891   fflush(yuv_rec_file);
2892 }
2893 #endif
2894
2895 #if CONFIG_VP9_HIGHBITDEPTH
2896 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
2897                                                 YV12_BUFFER_CONFIG *dst,
2898                                                 int bd) {
2899 #else
2900 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
2901                                                 YV12_BUFFER_CONFIG *dst) {
2902 #endif  // CONFIG_VP9_HIGHBITDEPTH
2903   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
2904   int i;
2905   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
2906                                    src->v_buffer };
2907   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
2908   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
2909                               src->uv_crop_width };
2910   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
2911                                src->uv_crop_height };
2912   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
2913   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
2914   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
2915                               dst->uv_crop_width };
2916   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
2917                                dst->uv_crop_height };
2918
2919   for (i = 0; i < MAX_MB_PLANE; ++i) {
2920 #if CONFIG_VP9_HIGHBITDEPTH
2921     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
2922       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
2923                               src_strides[i], dsts[i], dst_heights[i],
2924                               dst_widths[i], dst_strides[i], bd);
2925     } else {
2926       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
2927                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
2928     }
2929 #else
2930     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
2931                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
2932 #endif  // CONFIG_VP9_HIGHBITDEPTH
2933   }
2934   vpx_extend_frame_borders(dst);
2935 }
2936
2937 #if CONFIG_VP9_HIGHBITDEPTH
2938 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
2939                                    YV12_BUFFER_CONFIG *dst, int bd,
2940                                    INTERP_FILTER filter_type,
2941                                    int phase_scaler) {
2942   const int src_w = src->y_crop_width;
2943   const int src_h = src->y_crop_height;
2944   const int dst_w = dst->y_crop_width;
2945   const int dst_h = dst->y_crop_height;
2946   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
2947                                    src->v_buffer };
2948   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
2949   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
2950   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
2951   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
2952   int x, y, i;
2953
2954   for (i = 0; i < MAX_MB_PLANE; ++i) {
2955     const int factor = (i == 0 || i == 3 ? 1 : 2);
2956     const int src_stride = src_strides[i];
2957     const int dst_stride = dst_strides[i];
2958     for (y = 0; y < dst_h; y += 16) {
2959       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
2960       for (x = 0; x < dst_w; x += 16) {
2961         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
2962         const uint8_t *src_ptr = srcs[i] +
2963                                  (y / factor) * src_h / dst_h * src_stride +
2964                                  (x / factor) * src_w / dst_w;
2965         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
2966
2967         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
2968           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
2969                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
2970                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
2971                                16 * src_h / dst_h, 16 / factor, 16 / factor,
2972                                bd);
2973         } else {
2974           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
2975                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
2976                         16 * src_h / dst_h, 16 / factor, 16 / factor);
2977         }
2978       }
2979     }
2980   }
2981
2982   vpx_extend_frame_borders(dst);
2983 }
2984 #endif  // CONFIG_VP9_HIGHBITDEPTH
2985
2986 #if !CONFIG_REALTIME_ONLY
2987 static int scale_down(VP9_COMP *cpi, int q) {
2988   RATE_CONTROL *const rc = &cpi->rc;
2989   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
2990   int scale = 0;
2991   assert(frame_is_kf_gf_arf(cpi));
2992
2993   if (rc->frame_size_selector == UNSCALED &&
2994       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
2995     const int max_size_thresh =
2996         (int)(rate_thresh_mult[SCALE_STEP1] *
2997               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
2998     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
2999   }
3000   return scale;
3001 }
3002
3003 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
3004   const RATE_CONTROL *const rc = &cpi->rc;
3005   int big_miss_high;
3006
3007   if (frame_is_kf_gf_arf(cpi))
3008     big_miss_high = rc->this_frame_target * 3 / 2;
3009   else
3010     big_miss_high = rc->this_frame_target * 2;
3011
3012   return big_miss_high;
3013 }
3014
3015 static int big_rate_miss(VP9_COMP *cpi) {
3016   const RATE_CONTROL *const rc = &cpi->rc;
3017   int big_miss_high;
3018   int big_miss_low;
3019
3020   // Ignore for overlay frames
3021   if (rc->is_src_frame_alt_ref) {
3022     return 0;
3023   } else {
3024     big_miss_low = (rc->this_frame_target / 2);
3025     big_miss_high = big_rate_miss_high_threshold(cpi);
3026
3027     return (rc->projected_frame_size > big_miss_high) ||
3028            (rc->projected_frame_size < big_miss_low);
3029   }
3030 }
3031
3032 // test in two pass for the first
3033 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3034   if (cpi->oxcf.pass == 2) {
3035     TWO_PASS *const twopass = &cpi->twopass;
3036     GF_GROUP *const gf_group = &twopass->gf_group;
3037     const int gfg_index = gf_group->index;
3038
3039     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3040     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3041            gf_group->update_type[gfg_index] == LF_UPDATE;
3042   } else {
3043     return 0;
3044   }
3045 }
3046
3047 // Function to test for conditions that indicate we should loop
3048 // back and recode a frame.
3049 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3050                             int maxq, int minq) {
3051   const RATE_CONTROL *const rc = &cpi->rc;
3052   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3053   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3054   int force_recode = 0;
3055
3056   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3057       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3058       (two_pass_first_group_inter(cpi) &&
3059        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3060       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3061     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3062         scale_down(cpi, q)) {
3063       // Code this group at a lower resolution.
3064       cpi->resize_pending = 1;
3065       return 1;
3066     }
3067
3068     // Force recode for extreme overshoot.
3069     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3070         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3071          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3072       return 1;
3073     }
3074
3075     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3076     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3077         (rc->projected_frame_size < low_limit && q > minq)) {
3078       force_recode = 1;
3079     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3080       // Deal with frame undershoot and whether or not we are
3081       // below the automatically set cq level.
3082       if (q > oxcf->cq_level &&
3083           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3084         force_recode = 1;
3085       }
3086     }
3087   }
3088   return force_recode;
3089 }
3090 #endif  // !CONFIG_REALTIME_ONLY
3091
3092 static void update_ref_frames(VP9_COMP *cpi) {
3093   VP9_COMMON *const cm = &cpi->common;
3094   BufferPool *const pool = cm->buffer_pool;
3095   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3096
3097   if (cpi->rc.show_arf_as_gld) {
3098     int tmp = cpi->alt_fb_idx;
3099     cpi->alt_fb_idx = cpi->gld_fb_idx;
3100     cpi->gld_fb_idx = tmp;
3101   } else if (cm->show_existing_frame) {
3102     // Pop ARF.
3103     cpi->lst_fb_idx = cpi->alt_fb_idx;
3104     cpi->alt_fb_idx =
3105         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3106     --gf_group->stack_size;
3107   }
3108
3109   // At this point the new frame has been encoded.
3110   // If any buffer copy / swapping is signaled it should be done here.
3111   if (cm->frame_type == KEY_FRAME) {
3112     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3113                cm->new_fb_idx);
3114     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3115                cm->new_fb_idx);
3116   } else if (vp9_preserve_existing_gf(cpi)) {
3117     // We have decided to preserve the previously existing golden frame as our
3118     // new ARF frame. However, in the short term in function
3119     // vp9_get_refresh_mask() we left it in the GF slot and, if
3120     // we're updating the GF with the current decoded frame, we save it to the
3121     // ARF slot instead.
3122     // We now have to update the ARF with the current frame and swap gld_fb_idx
3123     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3124     // slot and, if we're updating the GF, the current frame becomes the new GF.
3125     int tmp;
3126
3127     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3128                cm->new_fb_idx);
3129
3130     tmp = cpi->alt_fb_idx;
3131     cpi->alt_fb_idx = cpi->gld_fb_idx;
3132     cpi->gld_fb_idx = tmp;
3133   } else { /* For non key/golden frames */
3134     if (cpi->refresh_alt_ref_frame) {
3135       int arf_idx = gf_group->top_arf_idx;
3136
3137       // Push new ARF into stack.
3138       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3139                  gf_group->stack_size);
3140       ++gf_group->stack_size;
3141
3142       assert(arf_idx < REF_FRAMES);
3143
3144       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3145       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3146              cpi->interp_filter_selected[0],
3147              sizeof(cpi->interp_filter_selected[0]));
3148
3149       cpi->alt_fb_idx = arf_idx;
3150     }
3151
3152     if (cpi->refresh_golden_frame) {
3153       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3154                  cm->new_fb_idx);
3155       if (!cpi->rc.is_src_frame_alt_ref)
3156         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3157                cpi->interp_filter_selected[0],
3158                sizeof(cpi->interp_filter_selected[0]));
3159       else
3160         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3161                cpi->interp_filter_selected[ALTREF_FRAME],
3162                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3163     }
3164   }
3165
3166   if (cpi->refresh_last_frame) {
3167     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3168                cm->new_fb_idx);
3169     if (!cpi->rc.is_src_frame_alt_ref)
3170       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3171              cpi->interp_filter_selected[0],
3172              sizeof(cpi->interp_filter_selected[0]));
3173   }
3174
3175   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3176     cpi->alt_fb_idx =
3177         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3178     --gf_group->stack_size;
3179   }
3180 }
3181
3182 void vp9_update_reference_frames(VP9_COMP *cpi) {
3183   update_ref_frames(cpi);
3184
3185 #if CONFIG_VP9_TEMPORAL_DENOISING
3186   vp9_denoiser_update_ref_frame(cpi);
3187 #endif
3188
3189   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3190 }
3191
3192 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3193   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3194   struct loopfilter *lf = &cm->lf;
3195   int is_reference_frame =
3196       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3197        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3198   if (cpi->use_svc &&
3199       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3200     is_reference_frame = !cpi->svc.non_reference_frame;
3201
3202   // Skip loop filter in show_existing_frame mode.
3203   if (cm->show_existing_frame) {
3204     lf->filter_level = 0;
3205     return;
3206   }
3207
3208   if (xd->lossless) {
3209     lf->filter_level = 0;
3210     lf->last_filt_level = 0;
3211   } else {
3212     struct vpx_usec_timer timer;
3213
3214     vpx_clear_system_state();
3215
3216     vpx_usec_timer_start(&timer);
3217
3218     if (!cpi->rc.is_src_frame_alt_ref) {
3219       if ((cpi->common.frame_type == KEY_FRAME) &&
3220           (!cpi->rc.this_key_frame_forced)) {
3221         lf->last_filt_level = 0;
3222       }
3223       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3224       lf->last_filt_level = lf->filter_level;
3225     } else {
3226       lf->filter_level = 0;
3227     }
3228
3229     vpx_usec_timer_mark(&timer);
3230     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3231   }
3232
3233   if (lf->filter_level > 0 && is_reference_frame) {
3234     vp9_build_mask_frame(cm, lf->filter_level, 0);
3235
3236     if (cpi->num_workers > 1)
3237       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3238                                lf->filter_level, 0, 0, cpi->workers,
3239                                cpi->num_workers, &cpi->lf_row_sync);
3240     else
3241       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3242   }
3243
3244   vpx_extend_frame_inner_borders(cm->frame_to_show);
3245 }
3246
3247 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3248   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3249   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3250       new_fb_ptr->mi_cols < cm->mi_cols) {
3251     vpx_free(new_fb_ptr->mvs);
3252     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3253                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3254                                          sizeof(*new_fb_ptr->mvs)));
3255     new_fb_ptr->mi_rows = cm->mi_rows;
3256     new_fb_ptr->mi_cols = cm->mi_cols;
3257   }
3258 }
3259
3260 void vp9_scale_references(VP9_COMP *cpi) {
3261   VP9_COMMON *cm = &cpi->common;
3262   MV_REFERENCE_FRAME ref_frame;
3263   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3264                                      VP9_ALT_FLAG };
3265
3266   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3267     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3268     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3269       BufferPool *const pool = cm->buffer_pool;
3270       const YV12_BUFFER_CONFIG *const ref =
3271           get_ref_frame_buffer(cpi, ref_frame);
3272
3273       if (ref == NULL) {
3274         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3275         continue;
3276       }
3277
3278 #if CONFIG_VP9_HIGHBITDEPTH
3279       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3280         RefCntBuffer *new_fb_ptr = NULL;
3281         int force_scaling = 0;
3282         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3283         if (new_fb == INVALID_IDX) {
3284           new_fb = get_free_fb(cm);
3285           force_scaling = 1;
3286         }
3287         if (new_fb == INVALID_IDX) return;
3288         new_fb_ptr = &pool->frame_bufs[new_fb];
3289         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3290             new_fb_ptr->buf.y_crop_height != cm->height) {
3291           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3292                                        cm->subsampling_x, cm->subsampling_y,
3293                                        cm->use_highbitdepth,
3294                                        VP9_ENC_BORDER_IN_PIXELS,
3295                                        cm->byte_alignment, NULL, NULL, NULL))
3296             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3297                                "Failed to allocate frame buffer");
3298           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3299                                  EIGHTTAP, 0);
3300           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3301           alloc_frame_mvs(cm, new_fb);
3302         }
3303 #else
3304       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3305         RefCntBuffer *new_fb_ptr = NULL;
3306         int force_scaling = 0;
3307         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3308         if (new_fb == INVALID_IDX) {
3309           new_fb = get_free_fb(cm);
3310           force_scaling = 1;
3311         }
3312         if (new_fb == INVALID_IDX) return;
3313         new_fb_ptr = &pool->frame_bufs[new_fb];
3314         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3315             new_fb_ptr->buf.y_crop_height != cm->height) {
3316           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3317                                        cm->subsampling_x, cm->subsampling_y,
3318                                        VP9_ENC_BORDER_IN_PIXELS,
3319                                        cm->byte_alignment, NULL, NULL, NULL))
3320             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3321                                "Failed to allocate frame buffer");
3322           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3323           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3324           alloc_frame_mvs(cm, new_fb);
3325         }
3326 #endif  // CONFIG_VP9_HIGHBITDEPTH
3327       } else {
3328         int buf_idx;
3329         RefCntBuffer *buf = NULL;
3330         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3331           // Check for release of scaled reference.
3332           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3333           if (buf_idx != INVALID_IDX) {
3334             buf = &pool->frame_bufs[buf_idx];
3335             --buf->ref_count;
3336             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3337           }
3338         }
3339         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3340         buf = &pool->frame_bufs[buf_idx];
3341         buf->buf.y_crop_width = ref->y_crop_width;
3342         buf->buf.y_crop_height = ref->y_crop_height;
3343         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3344         ++buf->ref_count;
3345       }
3346     } else {
3347       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3348         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3349     }
3350   }
3351 }
3352
3353 static void release_scaled_references(VP9_COMP *cpi) {
3354   VP9_COMMON *cm = &cpi->common;
3355   int i;
3356   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3357     // Only release scaled references under certain conditions:
3358     // if reference will be updated, or if scaled reference has same resolution.
3359     int refresh[3];
3360     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3361     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3362     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3363     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3364       const int idx = cpi->scaled_ref_idx[i - 1];
3365       if (idx != INVALID_IDX) {
3366         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3367         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3368         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3369                                buf->buf.y_crop_height == ref->y_crop_height)) {
3370           --buf->ref_count;
3371           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3372         }
3373       }
3374     }
3375   } else {
3376     for (i = 0; i < REFS_PER_FRAME; ++i) {
3377       const int idx = cpi->scaled_ref_idx[i];
3378       if (idx != INVALID_IDX) {
3379         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3380         --buf->ref_count;
3381         cpi->scaled_ref_idx[i] = INVALID_IDX;
3382       }
3383     }
3384   }
3385 }
3386
3387 static void full_to_model_count(unsigned int *model_count,
3388                                 unsigned int *full_count) {
3389   int n;
3390   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3391   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3392   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3393   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3394     model_count[TWO_TOKEN] += full_count[n];
3395   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3396 }
3397
3398 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3399                                  vp9_coeff_count *full_count) {
3400   int i, j, k, l;
3401
3402   for (i = 0; i < PLANE_TYPES; ++i)
3403     for (j = 0; j < REF_TYPES; ++j)
3404       for (k = 0; k < COEF_BANDS; ++k)
3405         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3406           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3407 }
3408
3409 #if 0 && CONFIG_INTERNAL_STATS
3410 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3411   VP9_COMMON *const cm = &cpi->common;
3412   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3413   int64_t recon_err;
3414
3415   vpx_clear_system_state();
3416
3417 #if CONFIG_VP9_HIGHBITDEPTH
3418   if (cm->use_highbitdepth) {
3419     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3420   } else {
3421     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3422   }
3423 #else
3424   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3425 #endif  // CONFIG_VP9_HIGHBITDEPTH
3426
3427
3428   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3429     double dc_quant_devisor;
3430 #if CONFIG_VP9_HIGHBITDEPTH
3431     switch (cm->bit_depth) {
3432       case VPX_BITS_8:
3433         dc_quant_devisor = 4.0;
3434         break;
3435       case VPX_BITS_10:
3436         dc_quant_devisor = 16.0;
3437         break;
3438       default:
3439         assert(cm->bit_depth == VPX_BITS_12);
3440         dc_quant_devisor = 64.0;
3441         break;
3442     }
3443 #else
3444     dc_quant_devisor = 4.0;
3445 #endif
3446
3447     if (!cm->current_video_frame) {
3448       fprintf(f, "frame, width, height, last ts, last end ts, "
3449           "source_alt_ref_pending, source_alt_ref_active, "
3450           "this_frame_target, projected_frame_size, "
3451           "projected_frame_size / MBs, "
3452           "projected_frame_size - this_frame_target, "
3453           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3454           "twopass.extend_minq, twopass.extend_minq_fast, "
3455           "total_target_vs_actual, "
3456           "starting_buffer_level - bits_off_target, "
3457           "total_actual_bits, base_qindex, q for base_qindex, "
3458           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3459           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3460           "frame_type, gfu_boost, "
3461           "twopass.bits_left, "
3462           "twopass.total_left_stats.coded_error, "
3463           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3464           "tot_recode_hits, recon_err, kf_boost, "
3465           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3466           "filter_level, seg.aq_av_offset\n");
3467     }
3468
3469     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3470         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3471         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3472         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3473         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3474         cpi->common.current_video_frame,
3475         cm->width, cm->height,
3476         cpi->last_time_stamp_seen,
3477         cpi->last_end_time_stamp_seen,
3478         cpi->rc.source_alt_ref_pending,
3479         cpi->rc.source_alt_ref_active,
3480         cpi->rc.this_frame_target,
3481         cpi->rc.projected_frame_size,
3482         cpi->rc.projected_frame_size / cpi->common.MBs,
3483         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3484         cpi->rc.vbr_bits_off_target,
3485         cpi->rc.vbr_bits_off_target_fast,
3486         cpi->twopass.extend_minq,
3487         cpi->twopass.extend_minq_fast,
3488         cpi->rc.total_target_vs_actual,
3489         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3490         cpi->rc.total_actual_bits, cm->base_qindex,
3491         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3492         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3493             dc_quant_devisor,
3494         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3495                                 cm->bit_depth),
3496         cpi->rc.avg_q,
3497         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3498         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3499         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3500         cpi->twopass.bits_left,
3501         cpi->twopass.total_left_stats.coded_error,
3502         cpi->twopass.bits_left /
3503             (1 + cpi->twopass.total_left_stats.coded_error),
3504         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3505         cpi->twopass.kf_zeromotion_pct,
3506         cpi->twopass.fr_content_type,
3507         cm->lf.filter_level,
3508         cm->seg.aq_av_offset);
3509   }
3510   fclose(f);
3511
3512   if (0) {
3513     FILE *const fmodes = fopen("Modes.stt", "a");
3514     int i;
3515
3516     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3517             cm->frame_type, cpi->refresh_golden_frame,
3518             cpi->refresh_alt_ref_frame);
3519
3520     for (i = 0; i < MAX_MODES; ++i)
3521       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3522
3523     fprintf(fmodes, "\n");
3524
3525     fclose(fmodes);
3526   }
3527 }
3528 #endif
3529
3530 static void set_mv_search_params(VP9_COMP *cpi) {
3531   const VP9_COMMON *const cm = &cpi->common;
3532   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3533
3534   // Default based on max resolution.
3535   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3536
3537   if (cpi->sf.mv.auto_mv_step_size) {
3538     if (frame_is_intra_only(cm)) {
3539       // Initialize max_mv_magnitude for use in the first INTER frame
3540       // after a key/intra-only frame.
3541       cpi->max_mv_magnitude = max_mv_def;
3542     } else {
3543       if (cm->show_frame) {
3544         // Allow mv_steps to correspond to twice the max mv magnitude found
3545         // in the previous frame, capped by the default max_mv_magnitude based
3546         // on resolution.
3547         cpi->mv_step_param = vp9_init_search_range(
3548             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3549       }
3550       cpi->max_mv_magnitude = 0;
3551     }
3552   }
3553 }
3554
3555 static void set_size_independent_vars(VP9_COMP *cpi) {
3556   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3557   vp9_set_rd_speed_thresholds(cpi);
3558   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3559   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3560 }
3561
3562 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3563                                     int *top_index) {
3564   VP9_COMMON *const cm = &cpi->common;
3565
3566   // Setup variables that depend on the dimensions of the frame.
3567   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3568
3569   // Decide q and q bounds.
3570   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3571
3572   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3573     *q = cpi->rc.worst_quality;
3574     cpi->rc.force_max_q = 0;
3575   }
3576
3577   if (!frame_is_intra_only(cm)) {
3578     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3579   }
3580
3581 #if !CONFIG_REALTIME_ONLY
3582   // Configure experimental use of segmentation for enhanced coding of
3583   // static regions if indicated.
3584   // Only allowed in the second pass of a two pass encode, as it requires
3585   // lagged coding, and if the relevant speed feature flag is set.
3586   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3587     configure_static_seg_features(cpi);
3588 #endif  // !CONFIG_REALTIME_ONLY
3589
3590 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3591   if (cpi->oxcf.noise_sensitivity > 0) {
3592     int l = 0;
3593     switch (cpi->oxcf.noise_sensitivity) {
3594       case 1: l = 20; break;
3595       case 2: l = 40; break;
3596       case 3: l = 60; break;
3597       case 4:
3598       case 5: l = 100; break;
3599       case 6: l = 150; break;
3600     }
3601     if (!cpi->common.postproc_state.limits) {
3602       cpi->common.postproc_state.limits =
3603           vpx_calloc(cpi->un_scaled_source->y_width,
3604                      sizeof(*cpi->common.postproc_state.limits));
3605     }
3606     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3607                 cpi->common.postproc_state.limits);
3608   }
3609 #endif  // CONFIG_VP9_POSTPROC
3610 }
3611
3612 #if CONFIG_VP9_TEMPORAL_DENOISING
3613 static void setup_denoiser_buffer(VP9_COMP *cpi) {
3614   VP9_COMMON *const cm = &cpi->common;
3615   if (cpi->oxcf.noise_sensitivity > 0 &&
3616       !cpi->denoiser.frame_buffer_initialized) {
3617     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
3618                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
3619                            cm->subsampling_x, cm->subsampling_y,
3620 #if CONFIG_VP9_HIGHBITDEPTH
3621                            cm->use_highbitdepth,
3622 #endif
3623                            VP9_ENC_BORDER_IN_PIXELS))
3624       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3625                          "Failed to allocate denoiser");
3626   }
3627 }
3628 #endif
3629
3630 static void init_motion_estimation(VP9_COMP *cpi) {
3631   int y_stride = cpi->scaled_source.y_stride;
3632
3633   if (cpi->sf.mv.search_method == NSTEP) {
3634     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3635   } else if (cpi->sf.mv.search_method == DIAMOND) {
3636     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3637   }
3638 }
3639
3640 static void set_frame_size(VP9_COMP *cpi) {
3641   int ref_frame;
3642   VP9_COMMON *const cm = &cpi->common;
3643   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3644   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3645
3646 #if !CONFIG_REALTIME_ONLY
3647   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3648       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3649        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3650     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3651                          &oxcf->scaled_frame_height);
3652
3653     // There has been a change in frame size.
3654     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3655                          oxcf->scaled_frame_height);
3656   }
3657 #endif  // !CONFIG_REALTIME_ONLY
3658
3659   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc &&
3660       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3661     oxcf->scaled_frame_width =
3662         (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3663     oxcf->scaled_frame_height =
3664         (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3665     // There has been a change in frame size.
3666     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3667                          oxcf->scaled_frame_height);
3668
3669     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3670     set_mv_search_params(cpi);
3671
3672     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3673 #if CONFIG_VP9_TEMPORAL_DENOISING
3674     // Reset the denoiser on the resized frame.
3675     if (cpi->oxcf.noise_sensitivity > 0) {
3676       vp9_denoiser_free(&(cpi->denoiser));
3677       setup_denoiser_buffer(cpi);
3678       // Dynamic resize is only triggered for non-SVC, so we can force
3679       // golden frame update here as temporary fix to denoiser.
3680       cpi->refresh_golden_frame = 1;
3681     }
3682 #endif
3683   }
3684
3685   if ((oxcf->pass == 2) && !cpi->use_svc) {
3686     vp9_set_target_rate(cpi);
3687   }
3688
3689   alloc_frame_mvs(cm, cm->new_fb_idx);
3690
3691   // Reset the frame pointers to the current frame size.
3692   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3693                                cm->subsampling_x, cm->subsampling_y,
3694 #if CONFIG_VP9_HIGHBITDEPTH
3695                                cm->use_highbitdepth,
3696 #endif
3697                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3698                                NULL, NULL, NULL))
3699     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3700                        "Failed to allocate frame buffer");
3701
3702   alloc_util_frame_buffers(cpi);
3703   init_motion_estimation(cpi);
3704
3705   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3706     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3707     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3708
3709     ref_buf->idx = buf_idx;
3710
3711     if (buf_idx != INVALID_IDX) {
3712       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3713       ref_buf->buf = buf;
3714 #if CONFIG_VP9_HIGHBITDEPTH
3715       vp9_setup_scale_factors_for_frame(
3716           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3717           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3718 #else
3719       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3720                                         buf->y_crop_height, cm->width,
3721                                         cm->height);
3722 #endif  // CONFIG_VP9_HIGHBITDEPTH
3723       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3724     } else {
3725       ref_buf->buf = NULL;
3726     }
3727   }
3728
3729   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3730 }
3731
3732 #if CONFIG_CONSISTENT_RECODE
3733 static void save_encode_params(VP9_COMP *cpi) {
3734   VP9_COMMON *const cm = &cpi->common;
3735   const int tile_cols = 1 << cm->log2_tile_cols;
3736   const int tile_rows = 1 << cm->log2_tile_rows;
3737   int tile_col, tile_row;
3738   int i, j;
3739   RD_OPT *rd_opt = &cpi->rd;
3740   for (i = 0; i < MAX_REF_FRAMES; i++) {
3741     for (j = 0; j < REFERENCE_MODES; j++)
3742       rd_opt->prediction_type_threshes_prev[i][j] =
3743           rd_opt->prediction_type_threshes[i][j];
3744
3745     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3746       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3747   }
3748
3749   if (cpi->tile_data != NULL) {
3750     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3751       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3752         TileDataEnc *tile_data =
3753             &cpi->tile_data[tile_row * tile_cols + tile_col];
3754         for (i = 0; i < BLOCK_SIZES; ++i) {
3755           for (j = 0; j < MAX_MODES; ++j) {
3756             tile_data->thresh_freq_fact_prev[i][j] =
3757                 tile_data->thresh_freq_fact[i][j];
3758           }
3759         }
3760       }
3761   }
3762 }
3763 #endif
3764
3765 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3766 #ifdef ENABLE_KF_DENOISE
3767   if (is_spatial_denoise_enabled(cpi)) {
3768     cpi->raw_source_frame = vp9_scale_if_required(
3769         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3770         (oxcf->pass == 0), EIGHTTAP, 0);
3771   } else {
3772     cpi->raw_source_frame = cpi->Source;
3773   }
3774 #else
3775   cpi->raw_source_frame = cpi->Source;
3776 #endif
3777 }
3778
3779 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3780                                       uint8_t *dest) {
3781   VP9_COMMON *const cm = &cpi->common;
3782   SVC *const svc = &cpi->svc;
3783   int q = 0, bottom_index = 0, top_index = 0;
3784   int no_drop_scene_change = 0;
3785   const INTERP_FILTER filter_scaler =
3786       (is_one_pass_cbr_svc(cpi))
3787           ? svc->downsample_filter_type[svc->spatial_layer_id]
3788           : EIGHTTAP;
3789   const int phase_scaler =
3790       (is_one_pass_cbr_svc(cpi))
3791           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3792           : 0;
3793
3794   if (cm->show_existing_frame) {
3795     cpi->rc.this_frame_target = 0;
3796     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3797     return 1;
3798   }
3799
3800   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3801
3802   // Flag to check if its valid to compute the source sad (used for
3803   // scene detection and for superblock content state in CBR mode).
3804   // The flag may get reset below based on SVC or resizing state.
3805   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3806
3807   vpx_clear_system_state();
3808
3809   set_frame_size(cpi);
3810
3811   if (is_one_pass_cbr_svc(cpi) &&
3812       cpi->un_scaled_source->y_width == cm->width << 2 &&
3813       cpi->un_scaled_source->y_height == cm->height << 2 &&
3814       svc->scaled_temp.y_width == cm->width << 1 &&
3815       svc->scaled_temp.y_height == cm->height << 1) {
3816     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3817     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3818     // result will be saved in scaled_temp and might be used later.
3819     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3820     const int phase_scaler2 = svc->downsample_filter_phase[1];
3821     cpi->Source = vp9_svc_twostage_scale(
3822         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3823         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3824     svc->scaled_one_half = 1;
3825   } else if (is_one_pass_cbr_svc(cpi) &&
3826              cpi->un_scaled_source->y_width == cm->width << 1 &&
3827              cpi->un_scaled_source->y_height == cm->height << 1 &&
3828              svc->scaled_one_half) {
3829     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3830     // two-stage scaling, use the result directly.
3831     cpi->Source = &svc->scaled_temp;
3832     svc->scaled_one_half = 0;
3833   } else {
3834     cpi->Source = vp9_scale_if_required(
3835         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3836         filter_scaler, phase_scaler);
3837   }
3838 #ifdef OUTPUT_YUV_SVC_SRC
3839   // Write out at most 3 spatial layers.
3840   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3841     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3842   }
3843 #endif
3844   // Unfiltered raw source used in metrics calculation if the source
3845   // has been filtered.
3846   if (is_psnr_calc_enabled(cpi)) {
3847 #ifdef ENABLE_KF_DENOISE
3848     if (is_spatial_denoise_enabled(cpi)) {
3849       cpi->raw_source_frame = vp9_scale_if_required(
3850           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3851           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3852     } else {
3853       cpi->raw_source_frame = cpi->Source;
3854     }
3855 #else
3856     cpi->raw_source_frame = cpi->Source;
3857 #endif
3858   }
3859
3860   if ((cpi->use_svc &&
3861        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3862         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3863         svc->current_superframe < 1)) ||
3864       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3865       cpi->resize_state != ORIG) {
3866     cpi->compute_source_sad_onepass = 0;
3867     if (cpi->content_state_sb_fd != NULL)
3868       memset(cpi->content_state_sb_fd, 0,
3869              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3870                  sizeof(*cpi->content_state_sb_fd));
3871   }
3872
3873   // Avoid scaling last_source unless its needed.
3874   // Last source is needed if avg_source_sad() is used, or if
3875   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3876   // estimation is enabled.
3877   if (cpi->unscaled_last_source != NULL &&
3878       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3879        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3880         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3881        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3882        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3883        cpi->compute_source_sad_onepass))
3884     cpi->Last_Source = vp9_scale_if_required(
3885         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3886         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3887
3888   if (cpi->Last_Source == NULL ||
3889       cpi->Last_Source->y_width != cpi->Source->y_width ||
3890       cpi->Last_Source->y_height != cpi->Source->y_height)
3891     cpi->compute_source_sad_onepass = 0;
3892
3893   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3894     memset(cpi->consec_zero_mv, 0,
3895            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3896   }
3897
3898 #if CONFIG_VP9_TEMPORAL_DENOISING
3899   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3900     vp9_denoiser_reset_on_first_frame(cpi);
3901 #endif
3902
3903   // Scene detection is always used for VBR mode or screen-content case.
3904   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
3905   // (need to check encoding time cost for doing this for speed 8).
3906   cpi->rc.high_source_sad = 0;
3907   cpi->rc.hybrid_intra_scene_change = 0;
3908   cpi->rc.re_encode_maxq_scene_change = 0;
3909   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
3910       (cpi->oxcf.rc_mode == VPX_VBR ||
3911        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3912        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
3913     vp9_scene_detection_onepass(cpi);
3914
3915   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
3916     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
3917     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
3918     // On scene change reset temporal layer pattern to TL0.
3919     // Note that if the base/lower spatial layers are skipped: instead of
3920     // inserting base layer here, we force max-q for the next superframe
3921     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
3922     // when max-q is decided for the current layer.
3923     // Only do this reset for bypass/flexible mode.
3924     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
3925         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
3926       // rc->high_source_sad will get reset so copy it to restore it.
3927       int tmp_high_source_sad = cpi->rc.high_source_sad;
3928       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
3929       cpi->rc.high_source_sad = tmp_high_source_sad;
3930     }
3931   }
3932
3933   vp9_update_noise_estimate(cpi);
3934
3935   // For 1 pass CBR, check if we are dropping this frame.
3936   // Never drop on key frame, if base layer is key for svc,
3937   // on scene change, or if superframe has layer sync.
3938   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
3939       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
3940     no_drop_scene_change = 1;
3941   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
3942       !frame_is_intra_only(cm) && !no_drop_scene_change &&
3943       !svc->superframe_has_layer_sync &&
3944       (!cpi->use_svc ||
3945        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
3946     if (vp9_rc_drop_frame(cpi)) return 0;
3947   }
3948
3949   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
3950   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
3951   // avoid this frame-level upsampling (for non intra_only frames).
3952   if (frame_is_intra_only(cm) == 0 &&
3953       !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
3954     vp9_scale_references(cpi);
3955   }
3956
3957   set_size_independent_vars(cpi);
3958   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
3959
3960   // search method and step parameter might be changed in speed settings.
3961   init_motion_estimation(cpi);
3962
3963   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
3964
3965   if (cpi->sf.svc_use_lowres_part &&
3966       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
3967     if (svc->prev_partition_svc == NULL) {
3968       CHECK_MEM_ERROR(
3969           cm, svc->prev_partition_svc,
3970           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
3971                                    sizeof(*svc->prev_partition_svc)));
3972     }
3973   }
3974
3975   // TODO(jianj): Look into issue of skin detection with high bitdepth.
3976   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
3977       cpi->oxcf.rc_mode == VPX_CBR &&
3978       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
3979       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
3980     cpi->use_skin_detection = 1;
3981   }
3982
3983   // Enable post encode frame dropping for CBR on non key frame, when
3984   // ext_use_post_encode_drop is specified by user.
3985   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
3986                                  cpi->oxcf.rc_mode == VPX_CBR &&
3987                                  cm->frame_type != KEY_FRAME;
3988
3989   vp9_set_quantizer(cm, q);
3990   vp9_set_variance_partition_thresholds(cpi, q, 0);
3991
3992   setup_frame(cpi);
3993
3994   suppress_active_map(cpi);
3995
3996   if (cpi->use_svc) {
3997     // On non-zero spatial layer, check for disabling inter-layer
3998     // prediction.
3999     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
4000     vp9_svc_assert_constraints_pattern(cpi);
4001   }
4002
4003   if (cpi->rc.last_post_encode_dropped_scene_change) {
4004     cpi->rc.high_source_sad = 1;
4005     svc->high_source_sad_superframe = 1;
4006     // For now disable use_source_sad since Last_Source will not be the previous
4007     // encoded but the dropped one.
4008     cpi->sf.use_source_sad = 0;
4009     cpi->rc.last_post_encode_dropped_scene_change = 0;
4010   }
4011   // Check if this high_source_sad (scene/slide change) frame should be
4012   // encoded at high/max QP, and if so, set the q and adjust some rate
4013   // control parameters.
4014   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
4015       (cpi->rc.high_source_sad ||
4016        (cpi->use_svc && svc->high_source_sad_superframe))) {
4017     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4018       vp9_set_quantizer(cm, q);
4019       vp9_set_variance_partition_thresholds(cpi, q, 0);
4020     }
4021   }
4022
4023 #if !CONFIG_REALTIME_ONLY
4024   // Variance adaptive and in frame q adjustment experiments are mutually
4025   // exclusive.
4026   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4027     vp9_vaq_frame_setup(cpi);
4028   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4029     vp9_360aq_frame_setup(cpi);
4030   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4031     vp9_setup_in_frame_q_adj(cpi);
4032   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4033     // it may be pretty bad for rate-control,
4034     // and I should handle it somehow
4035     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4036   } else {
4037 #endif
4038     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4039       vp9_cyclic_refresh_setup(cpi);
4040     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4041       apply_roi_map(cpi);
4042     }
4043 #if !CONFIG_REALTIME_ONLY
4044   }
4045 #endif
4046
4047   apply_active_map(cpi);
4048
4049   vp9_encode_frame(cpi);
4050
4051   // Check if we should re-encode this frame at high Q because of high
4052   // overshoot based on the encoded frame size. Only for frames where
4053   // high temporal-source SAD is detected.
4054   // For SVC: all spatial layers are checked for re-encoding.
4055   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4056       (cpi->rc.high_source_sad ||
4057        (cpi->use_svc && svc->high_source_sad_superframe))) {
4058     int frame_size = 0;
4059     // Get an estimate of the encoded frame size.
4060     save_coding_context(cpi);
4061     vp9_pack_bitstream(cpi, dest, size);
4062     restore_coding_context(cpi);
4063     frame_size = (int)(*size) << 3;
4064     // Check if encoded frame will overshoot too much, and if so, set the q and
4065     // adjust some rate control parameters, and return to re-encode the frame.
4066     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4067       vpx_clear_system_state();
4068       vp9_set_quantizer(cm, q);
4069       vp9_set_variance_partition_thresholds(cpi, q, 0);
4070       suppress_active_map(cpi);
4071       // Turn-off cyclic refresh for re-encoded frame.
4072       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4073         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4074         unsigned char *const seg_map = cpi->segmentation_map;
4075         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4076         memset(cr->last_coded_q_map, MAXQ,
4077                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4078         cr->sb_index = 0;
4079         vp9_disable_segmentation(&cm->seg);
4080       }
4081       apply_active_map(cpi);
4082       vp9_encode_frame(cpi);
4083     }
4084   }
4085
4086   // Update some stats from cyclic refresh, and check for golden frame update.
4087   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4088       !frame_is_intra_only(cm))
4089     vp9_cyclic_refresh_postencode(cpi);
4090
4091   // Update the skip mb flag probabilities based on the distribution
4092   // seen in the last encoder iteration.
4093   // update_base_skip_probs(cpi);
4094   vpx_clear_system_state();
4095   return 1;
4096 }
4097
4098 #if !CONFIG_REALTIME_ONLY
4099 #define MAX_QSTEP_ADJ 4
4100 static int get_qstep_adj(int rate_excess, int rate_limit) {
4101   int qstep =
4102       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4103   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4104 }
4105
4106 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4107                                     uint8_t *dest) {
4108   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4109   VP9_COMMON *const cm = &cpi->common;
4110   RATE_CONTROL *const rc = &cpi->rc;
4111   int bottom_index, top_index;
4112   int loop_count = 0;
4113   int loop_at_this_size = 0;
4114   int loop = 0;
4115   int overshoot_seen = 0;
4116   int undershoot_seen = 0;
4117   int frame_over_shoot_limit;
4118   int frame_under_shoot_limit;
4119   int q = 0, q_low = 0, q_high = 0;
4120   int enable_acl;
4121 #ifdef AGGRESSIVE_VBR
4122   int qrange_adj = 1;
4123 #endif
4124
4125   if (cm->show_existing_frame) {
4126     rc->this_frame_target = 0;
4127     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4128     return;
4129   }
4130
4131   set_size_independent_vars(cpi);
4132
4133   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4134                                        (cpi->twopass.gf_group.index == 1)
4135                                  : 0;
4136
4137   do {
4138     vpx_clear_system_state();
4139
4140     set_frame_size(cpi);
4141
4142     if (loop_count == 0 || cpi->resize_pending != 0) {
4143       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4144
4145 #ifdef AGGRESSIVE_VBR
4146       if (two_pass_first_group_inter(cpi)) {
4147         // Adjustment limits for min and max q
4148         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4149
4150         bottom_index =
4151             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4152         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4153       }
4154 #endif
4155       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4156       set_mv_search_params(cpi);
4157
4158       // Reset the loop state for new frame size.
4159       overshoot_seen = 0;
4160       undershoot_seen = 0;
4161
4162       // Reconfiguration for change in frame size has concluded.
4163       cpi->resize_pending = 0;
4164
4165       q_low = bottom_index;
4166       q_high = top_index;
4167
4168       loop_at_this_size = 0;
4169     }
4170
4171     // Decide frame size bounds first time through.
4172     if (loop_count == 0) {
4173       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4174                                        &frame_under_shoot_limit,
4175                                        &frame_over_shoot_limit);
4176     }
4177
4178     cpi->Source =
4179         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4180                               (oxcf->pass == 0), EIGHTTAP, 0);
4181
4182     // Unfiltered raw source used in metrics calculation if the source
4183     // has been filtered.
4184     if (is_psnr_calc_enabled(cpi)) {
4185 #ifdef ENABLE_KF_DENOISE
4186       if (is_spatial_denoise_enabled(cpi)) {
4187         cpi->raw_source_frame = vp9_scale_if_required(
4188             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4189             (oxcf->pass == 0), EIGHTTAP, 0);
4190       } else {
4191         cpi->raw_source_frame = cpi->Source;
4192       }
4193 #else
4194       cpi->raw_source_frame = cpi->Source;
4195 #endif
4196     }
4197
4198     if (cpi->unscaled_last_source != NULL)
4199       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4200                                                &cpi->scaled_last_source,
4201                                                (oxcf->pass == 0), EIGHTTAP, 0);
4202
4203     if (frame_is_intra_only(cm) == 0) {
4204       if (loop_count > 0) {
4205         release_scaled_references(cpi);
4206       }
4207       vp9_scale_references(cpi);
4208     }
4209
4210     vp9_set_quantizer(cm, q);
4211
4212     if (loop_count == 0) setup_frame(cpi);
4213
4214     // Variance adaptive and in frame q adjustment experiments are mutually
4215     // exclusive.
4216     if (oxcf->aq_mode == VARIANCE_AQ) {
4217       vp9_vaq_frame_setup(cpi);
4218     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4219       vp9_360aq_frame_setup(cpi);
4220     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4221       vp9_setup_in_frame_q_adj(cpi);
4222     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4223       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4224     } else if (oxcf->aq_mode == PSNR_AQ) {
4225       vp9_psnr_aq_mode_setup(&cm->seg);
4226     }
4227
4228     vp9_encode_frame(cpi);
4229
4230     // Update the skip mb flag probabilities based on the distribution
4231     // seen in the last encoder iteration.
4232     // update_base_skip_probs(cpi);
4233
4234     vpx_clear_system_state();
4235
4236     // Dummy pack of the bitstream using up to date stats to get an
4237     // accurate estimate of output frame size to determine if we need
4238     // to recode.
4239     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4240       save_coding_context(cpi);
4241       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4242
4243       rc->projected_frame_size = (int)(*size) << 3;
4244
4245       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4246     }
4247
4248     if (oxcf->rc_mode == VPX_Q) {
4249       loop = 0;
4250     } else {
4251       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4252           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4253         int last_q = q;
4254         int64_t kf_err;
4255
4256         int64_t high_err_target = cpi->ambient_err;
4257         int64_t low_err_target = cpi->ambient_err >> 1;
4258
4259 #if CONFIG_VP9_HIGHBITDEPTH
4260         if (cm->use_highbitdepth) {
4261           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4262         } else {
4263           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4264         }
4265 #else
4266         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4267 #endif  // CONFIG_VP9_HIGHBITDEPTH
4268
4269         // Prevent possible divide by zero error below for perfect KF
4270         kf_err += !kf_err;
4271
4272         // The key frame is not good enough or we can afford
4273         // to make it better without undue risk of popping.
4274         if ((kf_err > high_err_target &&
4275              rc->projected_frame_size <= frame_over_shoot_limit) ||
4276             (kf_err > low_err_target &&
4277              rc->projected_frame_size <= frame_under_shoot_limit)) {
4278           // Lower q_high
4279           q_high = q > q_low ? q - 1 : q_low;
4280
4281           // Adjust Q
4282           q = (int)((q * high_err_target) / kf_err);
4283           q = VPXMIN(q, (q_high + q_low) >> 1);
4284         } else if (kf_err < low_err_target &&
4285                    rc->projected_frame_size >= frame_under_shoot_limit) {
4286           // The key frame is much better than the previous frame
4287           // Raise q_low
4288           q_low = q < q_high ? q + 1 : q_high;
4289
4290           // Adjust Q
4291           q = (int)((q * low_err_target) / kf_err);
4292           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4293         }
4294
4295         // Clamp Q to upper and lower limits:
4296         q = clamp(q, q_low, q_high);
4297
4298         loop = q != last_q;
4299       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4300                                   frame_under_shoot_limit, q,
4301                                   VPXMAX(q_high, top_index), bottom_index)) {
4302         // Is the projected frame size out of range and are we allowed
4303         // to attempt to recode.
4304         int last_q = q;
4305         int retries = 0;
4306         int qstep;
4307
4308         if (cpi->resize_pending == 1) {
4309           // Change in frame size so go back around the recode loop.
4310           cpi->rc.frame_size_selector =
4311               SCALE_STEP1 - cpi->rc.frame_size_selector;
4312           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4313
4314 #if CONFIG_INTERNAL_STATS
4315           ++cpi->tot_recode_hits;
4316 #endif
4317           ++loop_count;
4318           loop = 1;
4319           continue;
4320         }
4321
4322         // Frame size out of permitted range:
4323         // Update correction factor & compute new Q to try...
4324
4325         // Frame is too large
4326         if (rc->projected_frame_size > rc->this_frame_target) {
4327           // Special case if the projected size is > the max allowed.
4328           if ((q == q_high) &&
4329               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4330                (!rc->is_src_frame_alt_ref &&
4331                 (rc->projected_frame_size >=
4332                  big_rate_miss_high_threshold(cpi))))) {
4333             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4334                                             big_rate_miss_high_threshold(cpi)));
4335             double q_val_high;
4336             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4337             q_val_high =
4338                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4339             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4340             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4341           }
4342
4343           // Raise Qlow as to at least the current value
4344           qstep =
4345               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4346           q_low = VPXMIN(q + qstep, q_high);
4347
4348           if (undershoot_seen || loop_at_this_size > 1) {
4349             // Update rate_correction_factor unless
4350             vp9_rc_update_rate_correction_factors(cpi);
4351
4352             q = (q_high + q_low + 1) / 2;
4353           } else {
4354             // Update rate_correction_factor unless
4355             vp9_rc_update_rate_correction_factors(cpi);
4356
4357             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4358                                   VPXMAX(q_high, top_index));
4359
4360             while (q < q_low && retries < 10) {
4361               vp9_rc_update_rate_correction_factors(cpi);
4362               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4363                                     VPXMAX(q_high, top_index));
4364               retries++;
4365             }
4366           }
4367
4368           overshoot_seen = 1;
4369         } else {
4370           // Frame is too small
4371           qstep =
4372               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4373           q_high = VPXMAX(q - qstep, q_low);
4374
4375           if (overshoot_seen || loop_at_this_size > 1) {
4376             vp9_rc_update_rate_correction_factors(cpi);
4377             q = (q_high + q_low) / 2;
4378           } else {
4379             vp9_rc_update_rate_correction_factors(cpi);
4380             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4381                                   VPXMIN(q_low, bottom_index), top_index);
4382             // Special case reset for qlow for constrained quality.
4383             // This should only trigger where there is very substantial
4384             // undershoot on a frame and the auto cq level is above
4385             // the user passsed in value.
4386             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4387               q_low = q;
4388             }
4389
4390             while (q > q_high && retries < 10) {
4391               vp9_rc_update_rate_correction_factors(cpi);
4392               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4393                                     VPXMIN(q_low, bottom_index), top_index);
4394               retries++;
4395             }
4396           }
4397           undershoot_seen = 1;
4398         }
4399
4400         // Clamp Q to upper and lower limits:
4401         q = clamp(q, q_low, q_high);
4402
4403         loop = (q != last_q);
4404       } else {
4405         loop = 0;
4406       }
4407     }
4408
4409     // Special case for overlay frame.
4410     if (rc->is_src_frame_alt_ref &&
4411         rc->projected_frame_size < rc->max_frame_bandwidth)
4412       loop = 0;
4413
4414     if (loop) {
4415       ++loop_count;
4416       ++loop_at_this_size;
4417
4418 #if CONFIG_INTERNAL_STATS
4419       ++cpi->tot_recode_hits;
4420 #endif
4421     }
4422
4423     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4424       if (loop || !enable_acl) restore_coding_context(cpi);
4425   } while (loop);
4426
4427 #ifdef AGGRESSIVE_VBR
4428   if (two_pass_first_group_inter(cpi)) {
4429     cpi->twopass.active_worst_quality =
4430         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4431   } else if (!frame_is_kf_gf_arf(cpi)) {
4432 #else
4433   if (!frame_is_kf_gf_arf(cpi)) {
4434 #endif
4435     // Have we been forced to adapt Q outside the expected range by an extreme
4436     // rate miss. If so adjust the active maxQ for the subsequent frames.
4437     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4438       cpi->twopass.active_worst_quality = q;
4439     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4440                rc->projected_frame_size < rc->this_frame_target) {
4441       cpi->twopass.active_worst_quality =
4442           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4443     }
4444   }
4445
4446   if (enable_acl) {
4447     // Skip recoding, if model diff is below threshold
4448     const int thresh = compute_context_model_thresh(cpi);
4449     const int diff = compute_context_model_diff(cm);
4450     if (diff < thresh) {
4451       vpx_clear_system_state();
4452       restore_coding_context(cpi);
4453       return;
4454     }
4455
4456     vp9_encode_frame(cpi);
4457     vpx_clear_system_state();
4458     restore_coding_context(cpi);
4459   }
4460 }
4461 #endif  // !CONFIG_REALTIME_ONLY
4462
4463 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4464   const int *const map = cpi->common.ref_frame_map;
4465   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4466   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4467   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4468   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4469
4470   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4471
4472   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4473       (cpi->svc.number_temporal_layers == 1 &&
4474        cpi->svc.number_spatial_layers == 1))
4475     flags &= ~VP9_GOLD_FLAG;
4476
4477   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4478
4479   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4480
4481   return flags;
4482 }
4483
4484 static void set_ext_overrides(VP9_COMP *cpi) {
4485   // Overrides the defaults with the externally supplied values with
4486   // vp9_update_reference() and vp9_update_entropy() calls
4487   // Note: The overrides are valid only for the next frame passed
4488   // to encode_frame_to_data_rate() function
4489   if (cpi->ext_refresh_frame_context_pending) {
4490     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4491     cpi->ext_refresh_frame_context_pending = 0;
4492   }
4493   if (cpi->ext_refresh_frame_flags_pending) {
4494     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4495     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4496     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4497   }
4498 }
4499
4500 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4501     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4502     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4503     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4504   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4505       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4506 #if CONFIG_VP9_HIGHBITDEPTH
4507     if (cm->bit_depth == VPX_BITS_8) {
4508       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4509                                  phase_scaler2);
4510       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4511                                  phase_scaler);
4512     } else {
4513       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4514                              filter_type2, phase_scaler2);
4515       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4516                              filter_type, phase_scaler);
4517     }
4518 #else
4519     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4520                                phase_scaler2);
4521     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4522 #endif  // CONFIG_VP9_HIGHBITDEPTH
4523     return scaled;
4524   } else {
4525     return unscaled;
4526   }
4527 }
4528
4529 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4530     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4531     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4532   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4533       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4534 #if CONFIG_VP9_HIGHBITDEPTH
4535     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4536         unscaled->y_height <= (scaled->y_height << 1))
4537       if (cm->bit_depth == VPX_BITS_8)
4538         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4539       else
4540         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4541                                filter_type, phase_scaler);
4542     else
4543       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4544 #else
4545     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4546         unscaled->y_height <= (scaled->y_height << 1))
4547       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4548     else
4549       scale_and_extend_frame_nonnormative(unscaled, scaled);
4550 #endif  // CONFIG_VP9_HIGHBITDEPTH
4551     return scaled;
4552   } else {
4553     return unscaled;
4554   }
4555 }
4556
4557 static void set_ref_sign_bias(VP9_COMP *cpi) {
4558   VP9_COMMON *const cm = &cpi->common;
4559   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4560   const int cur_frame_index = ref_buffer->frame_index;
4561   MV_REFERENCE_FRAME ref_frame;
4562
4563   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4564     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4565     const RefCntBuffer *const ref_cnt_buf =
4566         get_ref_cnt_buffer(&cpi->common, buf_idx);
4567     if (ref_cnt_buf) {
4568       cm->ref_frame_sign_bias[ref_frame] =
4569           cur_frame_index < ref_cnt_buf->frame_index;
4570     }
4571   }
4572 }
4573
4574 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4575   INTERP_FILTER ifilter;
4576   int ref_total[MAX_REF_FRAMES] = { 0 };
4577   MV_REFERENCE_FRAME ref;
4578   int mask = 0;
4579   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4580     return mask;
4581   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4582     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4583       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4584
4585   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4586     if ((ref_total[LAST_FRAME] &&
4587          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4588         (ref_total[GOLDEN_FRAME] == 0 ||
4589          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4590              ref_total[GOLDEN_FRAME]) &&
4591         (ref_total[ALTREF_FRAME] == 0 ||
4592          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4593              ref_total[ALTREF_FRAME]))
4594       mask |= 1 << ifilter;
4595   }
4596   return mask;
4597 }
4598
4599 #ifdef ENABLE_KF_DENOISE
4600 // Baseline Kernal weights for denoise
4601 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4602 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4603                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4604
4605 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4606                                      uint8_t point_weight, int *sum_val,
4607                                      int *sum_weight) {
4608   if (abs(centre_val - data_val) <= thresh) {
4609     *sum_weight += point_weight;
4610     *sum_val += (int)data_val * (int)point_weight;
4611   }
4612 }
4613
4614 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4615                                   const int strength) {
4616   int sum_weight = 0;
4617   int sum_val = 0;
4618   int thresh = strength;
4619   int kernal_size = 5;
4620   int half_k_size = 2;
4621   int i, j;
4622   int max_diff = 0;
4623   uint8_t *tmp_ptr;
4624   uint8_t *kernal_ptr;
4625
4626   // Find the maximum deviation from the source point in the locale.
4627   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4628   for (i = 0; i < kernal_size + 2; ++i) {
4629     for (j = 0; j < kernal_size + 2; ++j) {
4630       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4631     }
4632     tmp_ptr += stride;
4633   }
4634
4635   // Select the kernal size.
4636   if (max_diff > (strength + (strength >> 1))) {
4637     kernal_size = 3;
4638     half_k_size = 1;
4639     thresh = thresh >> 1;
4640   }
4641   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4642
4643   // Apply the kernal
4644   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4645   for (i = 0; i < kernal_size; ++i) {
4646     for (j = 0; j < kernal_size; ++j) {
4647       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4648                         &sum_val, &sum_weight);
4649       ++kernal_ptr;
4650     }
4651     tmp_ptr += stride;
4652   }
4653
4654   // Update the source value with the new filtered value
4655   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4656 }
4657
4658 #if CONFIG_VP9_HIGHBITDEPTH
4659 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4660                                          const int strength) {
4661   int sum_weight = 0;
4662   int sum_val = 0;
4663   int thresh = strength;
4664   int kernal_size = 5;
4665   int half_k_size = 2;
4666   int i, j;
4667   int max_diff = 0;
4668   uint16_t *tmp_ptr;
4669   uint8_t *kernal_ptr;
4670
4671   // Find the maximum deviation from the source point in the locale.
4672   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4673   for (i = 0; i < kernal_size + 2; ++i) {
4674     for (j = 0; j < kernal_size + 2; ++j) {
4675       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4676     }
4677     tmp_ptr += stride;
4678   }
4679
4680   // Select the kernal size.
4681   if (max_diff > (strength + (strength >> 1))) {
4682     kernal_size = 3;
4683     half_k_size = 1;
4684     thresh = thresh >> 1;
4685   }
4686   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4687
4688   // Apply the kernal
4689   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4690   for (i = 0; i < kernal_size; ++i) {
4691     for (j = 0; j < kernal_size; ++j) {
4692       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4693                         &sum_val, &sum_weight);
4694       ++kernal_ptr;
4695     }
4696     tmp_ptr += stride;
4697   }
4698
4699   // Update the source value with the new filtered value
4700   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4701 }
4702 #endif  // CONFIG_VP9_HIGHBITDEPTH
4703
4704 // Apply thresholded spatial noise supression to a given buffer.
4705 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4706                                    const int stride, const int width,
4707                                    const int height, const int strength) {
4708   VP9_COMMON *const cm = &cpi->common;
4709   uint8_t *src_ptr = buffer;
4710   int row;
4711   int col;
4712
4713   for (row = 0; row < height; ++row) {
4714     for (col = 0; col < width; ++col) {
4715 #if CONFIG_VP9_HIGHBITDEPTH
4716       if (cm->use_highbitdepth)
4717         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
4718                                      strength);
4719       else
4720         spatial_denoise_point(&src_ptr[col], stride, strength);
4721 #else
4722       spatial_denoise_point(&src_ptr[col], stride, strength);
4723 #endif  // CONFIG_VP9_HIGHBITDEPTH
4724     }
4725     src_ptr += stride;
4726   }
4727 }
4728
4729 // Apply thresholded spatial noise supression to source.
4730 static void spatial_denoise_frame(VP9_COMP *cpi) {
4731   YV12_BUFFER_CONFIG *src = cpi->Source;
4732   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4733   TWO_PASS *const twopass = &cpi->twopass;
4734   VP9_COMMON *const cm = &cpi->common;
4735
4736   // Base the filter strength on the current active max Q.
4737   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
4738                                               cm->bit_depth));
4739   int strength =
4740       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
4741
4742   // Denoise each of Y,U and V buffers.
4743   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
4744                          src->y_height, strength);
4745
4746   strength += (strength >> 1);
4747   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
4748                          src->uv_height, strength << 1);
4749
4750   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
4751                          src->uv_height, strength << 1);
4752 }
4753 #endif  // ENABLE_KF_DENOISE
4754
4755 #if !CONFIG_REALTIME_ONLY
4756 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
4757                                          uint8_t *dest) {
4758   if (cpi->common.seg.enabled)
4759     if (ALT_REF_AQ_PROTECT_GAIN) {
4760       size_t nsize = *size;
4761       int overhead;
4762
4763       // TODO(yuryg): optimize this, as
4764       // we don't really need to repack
4765
4766       save_coding_context(cpi);
4767       vp9_disable_segmentation(&cpi->common.seg);
4768       vp9_pack_bitstream(cpi, dest, &nsize);
4769       restore_coding_context(cpi);
4770
4771       overhead = (int)*size - (int)nsize;
4772
4773       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
4774         vp9_encode_frame(cpi);
4775       else
4776         vp9_enable_segmentation(&cpi->common.seg);
4777     }
4778 }
4779 #endif
4780
4781 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
4782   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4783
4784   if (ref_buffer) {
4785     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
4786     ref_buffer->frame_index =
4787         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
4788   }
4789 }
4790
4791 // Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On
4792 // rate distortion optimization using SSIM," Circuits and Systems for Video
4793 // Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013.
4794 // SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO.
4795 // Some sample values are:
4796 // (for midres test set)
4797 // SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
4798 //      8.0          9.421   -5.537  -6.898
4799 //     16.0          4.703   -5.378  -6.238
4800 //     32.0          1.929   -4.308  -4.807
4801 #define SSIM_VAR_SCALE 16.0
4802 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
4803   VP9_COMMON *cm = &cpi->common;
4804   ThreadData *td = &cpi->td;
4805   MACROBLOCK *x = &td->mb;
4806   MACROBLOCKD *xd = &x->e_mbd;
4807   uint8_t *y_buffer = cpi->Source->y_buffer;
4808   const int y_stride = cpi->Source->y_stride;
4809   const int block_size = BLOCK_16X16;
4810
4811   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
4812   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
4813   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
4814   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
4815   double log_sum = 0.0;
4816   int row, col;
4817
4818   const double c2 = 58.5225 * SSIM_VAR_SCALE;  // 58.5225 = (.03*255)^2
4819
4820   // Loop through each 64x64 block.
4821   for (row = 0; row < num_rows; ++row) {
4822     for (col = 0; col < num_cols; ++col) {
4823       int mi_row, mi_col;
4824       double var = 0.0, num_of_var = 0.0;
4825       const int index = row * num_cols + col;
4826
4827       for (mi_row = row * num_8x8_h;
4828            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
4829         for (mi_col = col * num_8x8_w;
4830              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
4831           struct buf_2d buf;
4832           const int row_offset_y = mi_row << 3;
4833           const int col_offset_y = mi_col << 3;
4834
4835           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
4836           buf.stride = y_stride;
4837
4838           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
4839           // and high bit videos, the variance needs to be divided by 2.0 or
4840           // 64.0 separately.
4841           // TODO(sdeng): need to tune for 12bit videos.
4842 #if CONFIG_VP9_HIGHBITDEPTH
4843           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
4844             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
4845           else
4846 #endif
4847             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
4848
4849           num_of_var += 1.0;
4850         }
4851       }
4852       var = var / num_of_var / 64.0;
4853       var = 2.0 * var + c2;
4854       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
4855       log_sum += log(var);
4856     }
4857   }
4858   log_sum = exp(log_sum / (double)(num_rows * num_cols));
4859
4860   for (row = 0; row < num_rows; ++row) {
4861     for (col = 0; col < num_cols; ++col) {
4862       const int index = row * num_cols + col;
4863       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
4864     }
4865   }
4866
4867   (void)xd;
4868 }
4869
4870 // Process the wiener variance in 16x16 block basis.
4871 static int qsort_comp(const void *elem1, const void *elem2) {
4872   int a = *((const int *)elem1);
4873   int b = *((const int *)elem2);
4874   if (a > b) return 1;
4875   if (a < b) return -1;
4876   return 0;
4877 }
4878
4879 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
4880   VP9_COMMON *cm = &cpi->common;
4881
4882   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
4883       cpi->mb_wiener_var_cols >= cm->mb_cols)
4884     return;
4885
4886   vpx_free(cpi->mb_wiener_variance);
4887   cpi->mb_wiener_variance = NULL;
4888
4889   CHECK_MEM_ERROR(
4890       cm, cpi->mb_wiener_variance,
4891       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
4892   cpi->mb_wiener_var_rows = cm->mb_rows;
4893   cpi->mb_wiener_var_cols = cm->mb_cols;
4894 }
4895
4896 static void set_mb_wiener_variance(VP9_COMP *cpi) {
4897   VP9_COMMON *cm = &cpi->common;
4898   uint8_t *buffer = cpi->Source->y_buffer;
4899   int buf_stride = cpi->Source->y_stride;
4900
4901 #if CONFIG_VP9_HIGHBITDEPTH
4902   ThreadData *td = &cpi->td;
4903   MACROBLOCK *x = &td->mb;
4904   MACROBLOCKD *xd = &x->e_mbd;
4905   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
4906   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
4907   uint8_t *zero_pred;
4908 #else
4909   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
4910 #endif
4911
4912   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
4913   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
4914
4915   int mb_row, mb_col, count = 0;
4916   // Hard coded operating block size
4917   const int block_size = 16;
4918   const int coeff_count = block_size * block_size;
4919   const TX_SIZE tx_size = TX_16X16;
4920
4921 #if CONFIG_VP9_HIGHBITDEPTH
4922   xd->cur_buf = cpi->Source;
4923   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4924     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
4925     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
4926   } else {
4927     zero_pred = zero_pred8;
4928     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
4929   }
4930 #else
4931   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
4932 #endif
4933
4934   cpi->norm_wiener_variance = 0;
4935
4936   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
4937     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
4938       int idx;
4939       int16_t median_val = 0;
4940       uint8_t *mb_buffer =
4941           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
4942       int64_t wiener_variance = 0;
4943
4944 #if CONFIG_VP9_HIGHBITDEPTH
4945       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4946         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
4947                                   mb_buffer, buf_stride, zero_pred, block_size,
4948                                   xd->bd);
4949         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4950       } else {
4951         vpx_subtract_block(block_size, block_size, src_diff, block_size,
4952                            mb_buffer, buf_stride, zero_pred, block_size);
4953         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4954       }
4955 #else
4956       vpx_subtract_block(block_size, block_size, src_diff, block_size,
4957                          mb_buffer, buf_stride, zero_pred, block_size);
4958       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4959 #endif  // CONFIG_VP9_HIGHBITDEPTH
4960
4961       coeff[0] = 0;
4962       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
4963
4964       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
4965
4966       // Noise level estimation
4967       median_val = coeff[coeff_count / 2];
4968
4969       // Wiener filter
4970       for (idx = 1; idx < coeff_count; ++idx) {
4971         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
4972         int64_t tmp_coeff = (int64_t)coeff[idx];
4973         if (median_val) {
4974           tmp_coeff = (sqr_coeff * coeff[idx]) /
4975                       (sqr_coeff + (int64_t)median_val * median_val);
4976         }
4977         wiener_variance += tmp_coeff * tmp_coeff;
4978       }
4979       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
4980           wiener_variance / coeff_count;
4981       cpi->norm_wiener_variance +=
4982           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
4983       ++count;
4984     }
4985   }
4986
4987   if (count) cpi->norm_wiener_variance /= count;
4988   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
4989 }
4990
4991 static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
4992                                       uint8_t *dest,
4993                                       unsigned int *frame_flags) {
4994   VP9_COMMON *const cm = &cpi->common;
4995   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4996   struct segmentation *const seg = &cm->seg;
4997   TX_SIZE t;
4998
4999   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
5000   // No need to set svc.skip_enhancement_layer if whole superframe will be
5001   // dropped.
5002   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
5003       cpi->oxcf.target_bandwidth == 0 &&
5004       !(cpi->svc.framedrop_mode != LAYER_DROP &&
5005         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
5006          cpi->svc
5007              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
5008                                                 1]) &&
5009         cpi->svc.drop_spatial_layer[0])) {
5010     cpi->svc.skip_enhancement_layer = 1;
5011     vp9_rc_postencode_update_drop_frame(cpi);
5012     cpi->ext_refresh_frame_flags_pending = 0;
5013     cpi->last_frame_dropped = 1;
5014     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
5015     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5016     vp9_inc_frame_in_layer(cpi);
5017     return;
5018   }
5019
5020   set_ext_overrides(cpi);
5021   vpx_clear_system_state();
5022
5023 #ifdef ENABLE_KF_DENOISE
5024   // Spatial denoise of key frame.
5025   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5026 #endif
5027
5028   if (cm->show_existing_frame == 0) {
5029     // Update frame index
5030     set_frame_index(cpi, cm);
5031
5032     // Set the arf sign bias for this frame.
5033     set_ref_sign_bias(cpi);
5034   }
5035
5036   // Set default state for segment based loop filter update flags.
5037   cm->lf.mode_ref_delta_update = 0;
5038
5039   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5040     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5041
5042   // Set various flags etc to special state if it is a key frame.
5043   if (frame_is_intra_only(cm)) {
5044     // Reset the loop filter deltas and segmentation map.
5045     vp9_reset_segment_features(&cm->seg);
5046
5047     // If segmentation is enabled force a map update for key frames.
5048     if (seg->enabled) {
5049       seg->update_map = 1;
5050       seg->update_data = 1;
5051     }
5052
5053     // The alternate reference frame cannot be active for a key frame.
5054     cpi->rc.source_alt_ref_active = 0;
5055
5056     cm->error_resilient_mode = oxcf->error_resilient_mode;
5057     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5058
5059     // By default, encoder assumes decoder can use prev_mi.
5060     if (cm->error_resilient_mode) {
5061       cm->frame_parallel_decoding_mode = 1;
5062       cm->reset_frame_context = 0;
5063       cm->refresh_frame_context = 0;
5064     } else if (cm->intra_only) {
5065       // Only reset the current context.
5066       cm->reset_frame_context = 2;
5067     }
5068   }
5069
5070   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5071
5072   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5073     init_mb_wiener_var_buffer(cpi);
5074     set_mb_wiener_variance(cpi);
5075   }
5076
5077   vpx_clear_system_state();
5078
5079 #if CONFIG_INTERNAL_STATS
5080   memset(cpi->mode_chosen_counts, 0,
5081          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5082 #endif
5083 #if CONFIG_CONSISTENT_RECODE
5084   // Backup to ensure consistency between recodes
5085   save_encode_params(cpi);
5086 #endif
5087
5088   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5089     if (!encode_without_recode_loop(cpi, size, dest)) return;
5090   } else {
5091 #if !CONFIG_REALTIME_ONLY
5092     encode_with_recode_loop(cpi, size, dest);
5093 #endif
5094   }
5095
5096   // TODO(jingning): When using show existing frame mode, we assume that the
5097   // current ARF will be directly used as the final reconstructed frame. This is
5098   // an encoder control scheme. One could in principle explore other
5099   // possibilities to arrange the reference frame buffer and their coding order.
5100   if (cm->show_existing_frame) {
5101     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5102                cm->ref_frame_map[cpi->alt_fb_idx]);
5103   }
5104
5105 #if !CONFIG_REALTIME_ONLY
5106   // Disable segmentation if it decrease rate/distortion ratio
5107   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5108     vp9_try_disable_lookahead_aq(cpi, size, dest);
5109 #endif
5110
5111 #if CONFIG_VP9_TEMPORAL_DENOISING
5112 #ifdef OUTPUT_YUV_DENOISED
5113   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5114     vpx_write_yuv_frame(yuv_denoised_file,
5115                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5116   }
5117 #endif
5118 #endif
5119 #ifdef OUTPUT_YUV_SKINMAP
5120   if (cpi->common.current_video_frame > 1) {
5121     vp9_output_skin_map(cpi, yuv_skinmap_file);
5122   }
5123 #endif
5124
5125   // Special case code to reduce pulsing when key frames are forced at a
5126   // fixed interval. Note the reconstruction error if it is the frame before
5127   // the force key frame
5128   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5129 #if CONFIG_VP9_HIGHBITDEPTH
5130     if (cm->use_highbitdepth) {
5131       cpi->ambient_err =
5132           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5133     } else {
5134       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5135     }
5136 #else
5137     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5138 #endif  // CONFIG_VP9_HIGHBITDEPTH
5139   }
5140
5141   // If the encoder forced a KEY_FRAME decision
5142   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5143
5144   cm->frame_to_show = get_frame_new_buffer(cm);
5145   cm->frame_to_show->color_space = cm->color_space;
5146   cm->frame_to_show->color_range = cm->color_range;
5147   cm->frame_to_show->render_width = cm->render_width;
5148   cm->frame_to_show->render_height = cm->render_height;
5149
5150   // Pick the loop filter level for the frame.
5151   loopfilter_frame(cpi, cm);
5152
5153   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5154
5155   // build the bitstream
5156   vp9_pack_bitstream(cpi, dest, size);
5157
5158   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5159       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5160     restore_coding_context(cpi);
5161     return;
5162   }
5163
5164   cpi->last_frame_dropped = 0;
5165   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5166   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5167     cpi->svc.num_encoded_top_layer++;
5168
5169   // Keep track of the frame buffer index updated/refreshed for the
5170   // current encoded TL0 superframe.
5171   if (cpi->svc.temporal_layer_id == 0) {
5172     if (cpi->refresh_last_frame)
5173       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5174     else if (cpi->refresh_golden_frame)
5175       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5176     else if (cpi->refresh_alt_ref_frame)
5177       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5178   }
5179
5180   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5181
5182   if (frame_is_intra_only(cm) == 0) {
5183     release_scaled_references(cpi);
5184   }
5185   vp9_update_reference_frames(cpi);
5186
5187   if (!cm->show_existing_frame) {
5188     for (t = TX_4X4; t <= TX_32X32; ++t) {
5189       full_to_model_counts(cpi->td.counts->coef[t],
5190                            cpi->td.rd_counts.coef_counts[t]);
5191     }
5192
5193     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5194       if (!frame_is_intra_only(cm)) {
5195         vp9_adapt_mode_probs(cm);
5196         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5197       }
5198       vp9_adapt_coef_probs(cm);
5199     }
5200   }
5201
5202   cpi->ext_refresh_frame_flags_pending = 0;
5203
5204   if (cpi->refresh_golden_frame == 1)
5205     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5206   else
5207     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5208
5209   if (cpi->refresh_alt_ref_frame == 1)
5210     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5211   else
5212     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5213
5214   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5215
5216   cm->last_frame_type = cm->frame_type;
5217
5218   vp9_rc_postencode_update(cpi, *size);
5219
5220   *size = VPXMAX(1, *size);
5221
5222 #if 0
5223   output_frame_level_debug_stats(cpi);
5224 #endif
5225
5226   if (cm->frame_type == KEY_FRAME) {
5227     // Tell the caller that the frame was coded as a key frame
5228     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5229   } else {
5230     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5231   }
5232
5233   // Clear the one shot update flags for segmentation map and mode/ref loop
5234   // filter deltas.
5235   cm->seg.update_map = 0;
5236   cm->seg.update_data = 0;
5237   cm->lf.mode_ref_delta_update = 0;
5238
5239   // keep track of the last coded dimensions
5240   cm->last_width = cm->width;
5241   cm->last_height = cm->height;
5242
5243   // reset to normal state now that we are done.
5244   if (!cm->show_existing_frame) {
5245     cm->last_show_frame = cm->show_frame;
5246     cm->prev_frame = cm->cur_frame;
5247   }
5248
5249   if (cm->show_frame) {
5250     vp9_swap_mi_and_prev_mi(cm);
5251     // Don't increment frame counters if this was an altref buffer
5252     // update not a real frame
5253     ++cm->current_video_frame;
5254     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5255   }
5256
5257   if (cpi->use_svc) {
5258     cpi->svc
5259         .layer_context[cpi->svc.spatial_layer_id *
5260                            cpi->svc.number_temporal_layers +
5261                        cpi->svc.temporal_layer_id]
5262         .last_frame_type = cm->frame_type;
5263     // Reset layer_sync back to 0 for next frame.
5264     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5265   }
5266
5267   cpi->force_update_segmentation = 0;
5268
5269 #if !CONFIG_REALTIME_ONLY
5270   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5271     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5272 #endif
5273
5274   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5275   cpi->svc.set_intra_only_frame = 0;
5276 }
5277
5278 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5279                       unsigned int *frame_flags) {
5280   vp9_rc_get_svc_params(cpi);
5281   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5282 }
5283
5284 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5285                         unsigned int *frame_flags) {
5286   if (cpi->oxcf.rc_mode == VPX_CBR) {
5287     vp9_rc_get_one_pass_cbr_params(cpi);
5288   } else {
5289     vp9_rc_get_one_pass_vbr_params(cpi);
5290   }
5291   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5292 }
5293
5294 #if !CONFIG_REALTIME_ONLY
5295 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5296                         unsigned int *frame_flags) {
5297   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5298 #if CONFIG_MISMATCH_DEBUG
5299   mismatch_move_frame_idx_w();
5300 #endif
5301   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5302
5303   vp9_twopass_postencode_update(cpi);
5304 }
5305 #endif  // !CONFIG_REALTIME_ONLY
5306
5307 static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
5308                                  int subsampling_x, int subsampling_y) {
5309   VP9_COMMON *const cm = &cpi->common;
5310 #if !CONFIG_VP9_HIGHBITDEPTH
5311   (void)use_highbitdepth;
5312   assert(use_highbitdepth == 0);
5313 #endif
5314
5315   if (!cpi->initial_width ||
5316 #if CONFIG_VP9_HIGHBITDEPTH
5317       cm->use_highbitdepth != use_highbitdepth ||
5318 #endif
5319       cm->subsampling_x != subsampling_x ||
5320       cm->subsampling_y != subsampling_y) {
5321     cm->subsampling_x = subsampling_x;
5322     cm->subsampling_y = subsampling_y;
5323 #if CONFIG_VP9_HIGHBITDEPTH
5324     cm->use_highbitdepth = use_highbitdepth;
5325 #endif
5326
5327     cpi->initial_width = cm->width;
5328     cpi->initial_height = cm->height;
5329     cpi->initial_mbs = cm->MBs;
5330   }
5331 }
5332
5333 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5334                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5335                           int64_t end_time) {
5336   VP9_COMMON *const cm = &cpi->common;
5337   struct vpx_usec_timer timer;
5338   int res = 0;
5339   const int subsampling_x = sd->subsampling_x;
5340   const int subsampling_y = sd->subsampling_y;
5341 #if CONFIG_VP9_HIGHBITDEPTH
5342   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5343 #else
5344   const int use_highbitdepth = 0;
5345 #endif
5346
5347   update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5348 #if CONFIG_VP9_TEMPORAL_DENOISING
5349   setup_denoiser_buffer(cpi);
5350 #endif
5351
5352   alloc_raw_frame_buffers(cpi);
5353
5354   vpx_usec_timer_start(&timer);
5355
5356   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5357                          use_highbitdepth, frame_flags))
5358     res = -1;
5359   vpx_usec_timer_mark(&timer);
5360   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5361
5362   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5363       (subsampling_x != 1 || subsampling_y != 1)) {
5364     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5365                        "Non-4:2:0 color format requires profile 1 or 3");
5366     res = -1;
5367   }
5368   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5369       (subsampling_x == 1 && subsampling_y == 1)) {
5370     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5371                        "4:2:0 color format requires profile 0 or 2");
5372     res = -1;
5373   }
5374
5375   return res;
5376 }
5377
5378 static int frame_is_reference(const VP9_COMP *cpi) {
5379   const VP9_COMMON *cm = &cpi->common;
5380
5381   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5382          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5383          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5384          cm->seg.update_map || cm->seg.update_data;
5385 }
5386
5387 static void adjust_frame_rate(VP9_COMP *cpi,
5388                               const struct lookahead_entry *source) {
5389   int64_t this_duration;
5390   int step = 0;
5391
5392   if (source->ts_start == cpi->first_time_stamp_ever) {
5393     this_duration = source->ts_end - source->ts_start;
5394     step = 1;
5395   } else {
5396     int64_t last_duration =
5397         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5398
5399     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5400
5401     // do a step update if the duration changes by 10%
5402     if (last_duration)
5403       step = (int)((this_duration - last_duration) * 10 / last_duration);
5404   }
5405
5406   if (this_duration) {
5407     if (step) {
5408       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5409     } else {
5410       // Average this frame's rate into the last second's average
5411       // frame rate. If we haven't seen 1 second yet, then average
5412       // over the whole interval seen.
5413       const double interval = VPXMIN(
5414           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5415       double avg_duration = 10000000.0 / cpi->framerate;
5416       avg_duration *= (interval - avg_duration + this_duration);
5417       avg_duration /= interval;
5418
5419       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5420     }
5421   }
5422   cpi->last_time_stamp_seen = source->ts_start;
5423   cpi->last_end_time_stamp_seen = source->ts_end;
5424 }
5425
5426 // Returns 0 if this is not an alt ref else the offset of the source frame
5427 // used as the arf midpoint.
5428 static int get_arf_src_index(VP9_COMP *cpi) {
5429   RATE_CONTROL *const rc = &cpi->rc;
5430   int arf_src_index = 0;
5431   if (is_altref_enabled(cpi)) {
5432     if (cpi->oxcf.pass == 2) {
5433       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5434       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5435         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5436       }
5437     } else if (rc->source_alt_ref_pending) {
5438       arf_src_index = rc->frames_till_gf_update_due;
5439     }
5440   }
5441   return arf_src_index;
5442 }
5443
5444 static void check_src_altref(VP9_COMP *cpi,
5445                              const struct lookahead_entry *source) {
5446   RATE_CONTROL *const rc = &cpi->rc;
5447
5448   if (cpi->oxcf.pass == 2) {
5449     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5450     rc->is_src_frame_alt_ref =
5451         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5452   } else {
5453     rc->is_src_frame_alt_ref =
5454         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5455   }
5456
5457   if (rc->is_src_frame_alt_ref) {
5458     // Current frame is an ARF overlay frame.
5459     cpi->alt_ref_source = NULL;
5460
5461     // Don't refresh the last buffer for an ARF overlay frame. It will
5462     // become the GF so preserve last as an alternative prediction option.
5463     cpi->refresh_last_frame = 0;
5464   }
5465 }
5466
5467 #if CONFIG_INTERNAL_STATS
5468 static void adjust_image_stat(double y, double u, double v, double all,
5469                               ImageStat *s) {
5470   s->stat[Y] += y;
5471   s->stat[U] += u;
5472   s->stat[V] += v;
5473   s->stat[ALL] += all;
5474   s->worst = VPXMIN(s->worst, all);
5475 }
5476 #endif  // CONFIG_INTERNAL_STATS
5477
5478 // Adjust the maximum allowable frame size for the target level.
5479 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5480   RATE_CONTROL *const rc = &cpi->rc;
5481   LevelConstraint *const ls = &cpi->level_constraint;
5482   VP9_COMMON *const cm = &cpi->common;
5483   const double max_cpb_size = ls->max_cpb_size;
5484   vpx_clear_system_state();
5485   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5486   if (frame_is_intra_only(cm)) {
5487     rc->max_frame_bandwidth =
5488         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5489   } else if (arf_src_index > 0) {
5490     rc->max_frame_bandwidth =
5491         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5492   } else {
5493     rc->max_frame_bandwidth =
5494         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5495   }
5496 }
5497
5498 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5499   VP9_COMMON *const cm = &cpi->common;
5500   Vp9LevelInfo *const level_info = &cpi->level_info;
5501   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5502   Vp9LevelStats *const level_stats = &level_info->level_stats;
5503   int i, idx;
5504   uint64_t luma_samples, dur_end;
5505   const uint32_t luma_pic_size = cm->width * cm->height;
5506   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5507   LevelConstraint *const level_constraint = &cpi->level_constraint;
5508   const int8_t level_index = level_constraint->level_index;
5509   double cpb_data_size;
5510
5511   vpx_clear_system_state();
5512
5513   // update level_stats
5514   level_stats->total_compressed_size += *size;
5515   if (cm->show_frame) {
5516     level_stats->total_uncompressed_size +=
5517         luma_pic_size +
5518         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5519     level_stats->time_encoded =
5520         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5521         (double)TICKS_PER_SEC;
5522   }
5523
5524   if (arf_src_index > 0) {
5525     if (!level_stats->seen_first_altref) {
5526       level_stats->seen_first_altref = 1;
5527     } else if (level_stats->frames_since_last_altref <
5528                level_spec->min_altref_distance) {
5529       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5530     }
5531     level_stats->frames_since_last_altref = 0;
5532   } else {
5533     ++level_stats->frames_since_last_altref;
5534   }
5535
5536   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5537     idx = (level_stats->frame_window_buffer.start +
5538            level_stats->frame_window_buffer.len++) %
5539           FRAME_WINDOW_SIZE;
5540   } else {
5541     idx = level_stats->frame_window_buffer.start;
5542     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5543   }
5544   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5545   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5546   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5547
5548   if (cm->frame_type == KEY_FRAME) {
5549     level_stats->ref_refresh_map = 0;
5550   } else {
5551     int count = 0;
5552     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5553     // Also need to consider the case where the encoder refers to a buffer
5554     // that has been implicitly refreshed after encoding a keyframe.
5555     if (!cm->intra_only) {
5556       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5557       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5558       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5559     }
5560     for (i = 0; i < REF_FRAMES; ++i) {
5561       count += (level_stats->ref_refresh_map >> i) & 1;
5562     }
5563     if (count > level_spec->max_ref_frame_buffers) {
5564       level_spec->max_ref_frame_buffers = count;
5565     }
5566   }
5567
5568   // update average_bitrate
5569   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5570                                 125.0 / level_stats->time_encoded;
5571
5572   // update max_luma_sample_rate
5573   luma_samples = 0;
5574   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5575     idx = (level_stats->frame_window_buffer.start +
5576            level_stats->frame_window_buffer.len - 1 - i) %
5577           FRAME_WINDOW_SIZE;
5578     if (i == 0) {
5579       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5580     }
5581     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5582         TICKS_PER_SEC) {
5583       break;
5584     }
5585     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5586   }
5587   if (luma_samples > level_spec->max_luma_sample_rate) {
5588     level_spec->max_luma_sample_rate = luma_samples;
5589   }
5590
5591   // update max_cpb_size
5592   cpb_data_size = 0;
5593   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5594     if (i >= level_stats->frame_window_buffer.len) break;
5595     idx = (level_stats->frame_window_buffer.start +
5596            level_stats->frame_window_buffer.len - 1 - i) %
5597           FRAME_WINDOW_SIZE;
5598     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5599   }
5600   cpb_data_size = cpb_data_size / 125.0;
5601   if (cpb_data_size > level_spec->max_cpb_size) {
5602     level_spec->max_cpb_size = cpb_data_size;
5603   }
5604
5605   // update max_luma_picture_size
5606   if (luma_pic_size > level_spec->max_luma_picture_size) {
5607     level_spec->max_luma_picture_size = luma_pic_size;
5608   }
5609
5610   // update max_luma_picture_breadth
5611   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5612     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5613   }
5614
5615   // update compression_ratio
5616   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5617                                   cm->bit_depth /
5618                                   level_stats->total_compressed_size / 8.0;
5619
5620   // update max_col_tiles
5621   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5622     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5623   }
5624
5625   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5626     if (level_spec->max_luma_picture_size >
5627         vp9_level_defs[level_index].max_luma_picture_size) {
5628       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5629       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5630                          "Failed to encode to the target level %d. %s",
5631                          vp9_level_defs[level_index].level,
5632                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5633     }
5634
5635     if (level_spec->max_luma_picture_breadth >
5636         vp9_level_defs[level_index].max_luma_picture_breadth) {
5637       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5638       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5639                          "Failed to encode to the target level %d. %s",
5640                          vp9_level_defs[level_index].level,
5641                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5642     }
5643
5644     if ((double)level_spec->max_luma_sample_rate >
5645         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5646             (1 + SAMPLE_RATE_GRACE_P)) {
5647       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5648       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5649                          "Failed to encode to the target level %d. %s",
5650                          vp9_level_defs[level_index].level,
5651                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5652     }
5653
5654     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5655       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5656       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5657                          "Failed to encode to the target level %d. %s",
5658                          vp9_level_defs[level_index].level,
5659                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5660     }
5661
5662     if (level_spec->min_altref_distance <
5663         vp9_level_defs[level_index].min_altref_distance) {
5664       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5665       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5666                          "Failed to encode to the target level %d. %s",
5667                          vp9_level_defs[level_index].level,
5668                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5669     }
5670
5671     if (level_spec->max_ref_frame_buffers >
5672         vp9_level_defs[level_index].max_ref_frame_buffers) {
5673       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5674       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5675                          "Failed to encode to the target level %d. %s",
5676                          vp9_level_defs[level_index].level,
5677                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5678     }
5679
5680     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5681       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5682       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5683                          "Failed to encode to the target level %d. %s",
5684                          vp9_level_defs[level_index].level,
5685                          level_fail_messages[CPB_TOO_LARGE]);
5686     }
5687
5688     // Set an upper bound for the next frame size. It will be used in
5689     // level_rc_framerate() before encoding the next frame.
5690     cpb_data_size = 0;
5691     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5692       if (i >= level_stats->frame_window_buffer.len) break;
5693       idx = (level_stats->frame_window_buffer.start +
5694              level_stats->frame_window_buffer.len - 1 - i) %
5695             FRAME_WINDOW_SIZE;
5696       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5697     }
5698     cpb_data_size = cpb_data_size / 125.0;
5699     level_constraint->max_frame_size =
5700         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
5701               1000.0);
5702     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
5703       level_constraint->max_frame_size >>= 1;
5704   }
5705 }
5706
5707 typedef struct GF_PICTURE {
5708   YV12_BUFFER_CONFIG *frame;
5709   int ref_frame[3];
5710   FRAME_UPDATE_TYPE update_type;
5711 } GF_PICTURE;
5712
5713 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
5714                             const GF_GROUP *gf_group, int *tpl_group_frames) {
5715   VP9_COMMON *cm = &cpi->common;
5716   int frame_idx = 0;
5717   int i;
5718   int gld_index = -1;
5719   int alt_index = -1;
5720   int lst_index = -1;
5721   int arf_index_stack[MAX_ARF_LAYERS];
5722   int arf_stack_size = 0;
5723   int extend_frame_count = 0;
5724   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
5725   int frame_gop_offset = 0;
5726
5727   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
5728   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
5729
5730   memset(recon_frame_index, -1, sizeof(recon_frame_index));
5731   stack_init(arf_index_stack, MAX_ARF_LAYERS);
5732
5733   // TODO(jingning): To be used later for gf frame type parsing.
5734   (void)gf_group;
5735
5736   for (i = 0; i < FRAME_BUFFERS; ++i) {
5737     if (frame_bufs[i].ref_count == 0) {
5738       alloc_frame_mvs(cm, i);
5739       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
5740                                    cm->subsampling_x, cm->subsampling_y,
5741 #if CONFIG_VP9_HIGHBITDEPTH
5742                                    cm->use_highbitdepth,
5743 #endif
5744                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
5745                                    NULL, NULL, NULL))
5746         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
5747                            "Failed to allocate frame buffer");
5748
5749       recon_frame_index[frame_idx] = i;
5750       ++frame_idx;
5751
5752       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
5753     }
5754   }
5755
5756   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
5757     assert(recon_frame_index[i] >= 0);
5758     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
5759   }
5760
5761   *tpl_group_frames = 0;
5762
5763   // Initialize Golden reference frame.
5764   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
5765   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
5766   gf_picture[0].update_type = gf_group->update_type[0];
5767   gld_index = 0;
5768   ++*tpl_group_frames;
5769
5770   // Initialize base layer ARF frame
5771   gf_picture[1].frame = cpi->Source;
5772   gf_picture[1].ref_frame[0] = gld_index;
5773   gf_picture[1].ref_frame[1] = lst_index;
5774   gf_picture[1].ref_frame[2] = alt_index;
5775   gf_picture[1].update_type = gf_group->update_type[1];
5776   alt_index = 1;
5777   ++*tpl_group_frames;
5778
5779   // Initialize P frames
5780   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5781     struct lookahead_entry *buf;
5782     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
5783     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5784
5785     if (buf == NULL) break;
5786
5787     gf_picture[frame_idx].frame = &buf->img;
5788     gf_picture[frame_idx].ref_frame[0] = gld_index;
5789     gf_picture[frame_idx].ref_frame[1] = lst_index;
5790     gf_picture[frame_idx].ref_frame[2] = alt_index;
5791     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
5792
5793     switch (gf_group->update_type[frame_idx]) {
5794       case ARF_UPDATE:
5795         stack_push(arf_index_stack, alt_index, arf_stack_size);
5796         ++arf_stack_size;
5797         alt_index = frame_idx;
5798         break;
5799       case LF_UPDATE: lst_index = frame_idx; break;
5800       case OVERLAY_UPDATE:
5801         gld_index = frame_idx;
5802         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5803         --arf_stack_size;
5804         break;
5805       case USE_BUF_FRAME:
5806         lst_index = alt_index;
5807         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5808         --arf_stack_size;
5809         break;
5810       default: break;
5811     }
5812
5813     ++*tpl_group_frames;
5814
5815     // The length of group of pictures is baseline_gf_interval, plus the
5816     // beginning golden frame from last GOP, plus the last overlay frame in
5817     // the same GOP.
5818     if (frame_idx == gf_group->gf_group_size) break;
5819   }
5820
5821   alt_index = -1;
5822   ++frame_idx;
5823   ++frame_gop_offset;
5824
5825   // Extend two frames outside the current gf group.
5826   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
5827     struct lookahead_entry *buf =
5828         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5829
5830     if (buf == NULL) break;
5831
5832     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
5833
5834     gf_picture[frame_idx].frame = &buf->img;
5835     gf_picture[frame_idx].ref_frame[0] = gld_index;
5836     gf_picture[frame_idx].ref_frame[1] = lst_index;
5837     gf_picture[frame_idx].ref_frame[2] = alt_index;
5838     gf_picture[frame_idx].update_type = LF_UPDATE;
5839     lst_index = frame_idx;
5840     ++*tpl_group_frames;
5841     ++extend_frame_count;
5842     ++frame_gop_offset;
5843   }
5844 }
5845
5846 static void init_tpl_stats(VP9_COMP *cpi) {
5847   int frame_idx;
5848   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5849     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
5850     memset(tpl_frame->tpl_stats_ptr, 0,
5851            tpl_frame->height * tpl_frame->width *
5852                sizeof(*tpl_frame->tpl_stats_ptr));
5853     tpl_frame->is_valid = 0;
5854   }
5855 }
5856
5857 #if CONFIG_NON_GREEDY_MV
5858 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5859                                          MotionField *motion_field,
5860                                          int frame_idx, uint8_t *cur_frame_buf,
5861                                          uint8_t *ref_frame_buf, int stride,
5862                                          BLOCK_SIZE bsize, int mi_row,
5863                                          int mi_col, MV *mv) {
5864   MACROBLOCK *const x = &td->mb;
5865   MACROBLOCKD *const xd = &x->e_mbd;
5866   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5867   int step_param;
5868   uint32_t bestsme = UINT_MAX;
5869   const MvLimits tmp_mv_limits = x->mv_limits;
5870   // lambda is used to adjust the importance of motion vector consitency.
5871   // TODO(angiebird): Figure out lambda's proper value.
5872   const int lambda = cpi->tpl_stats[frame_idx].lambda;
5873   int_mv nb_full_mvs[NB_MVS_NUM];
5874   int nb_full_mv_num;
5875
5876   MV best_ref_mv1 = { 0, 0 };
5877   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5878
5879   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
5880   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
5881
5882   // Setup frame pointers
5883   x->plane[0].src.buf = cur_frame_buf;
5884   x->plane[0].src.stride = stride;
5885   xd->plane[0].pre[0].buf = ref_frame_buf;
5886   xd->plane[0].pre[0].stride = stride;
5887
5888   step_param = mv_sf->reduce_first_step_size;
5889   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
5890
5891   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
5892
5893   nb_full_mv_num =
5894       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
5895   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
5896                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
5897
5898   /* restore UMV window */
5899   x->mv_limits = tmp_mv_limits;
5900
5901   return bestsme;
5902 }
5903
5904 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5905                                         uint8_t *cur_frame_buf,
5906                                         uint8_t *ref_frame_buf, int stride,
5907                                         BLOCK_SIZE bsize, MV *mv) {
5908   MACROBLOCK *const x = &td->mb;
5909   MACROBLOCKD *const xd = &x->e_mbd;
5910   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5911   uint32_t bestsme = UINT_MAX;
5912   uint32_t distortion;
5913   uint32_t sse;
5914   int cost_list[5];
5915
5916   MV best_ref_mv1 = { 0, 0 };
5917
5918   // Setup frame pointers
5919   x->plane[0].src.buf = cur_frame_buf;
5920   x->plane[0].src.stride = stride;
5921   xd->plane[0].pre[0].buf = ref_frame_buf;
5922   xd->plane[0].pre[0].stride = stride;
5923
5924   // TODO(yunqing): may use higher tap interp filter than 2 taps.
5925   // Ignore mv costing by sending NULL pointer instead of cost array
5926   bestsme = cpi->find_fractional_mv_step(
5927       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
5928       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
5929       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
5930       USE_2_TAPS);
5931
5932   return bestsme;
5933 }
5934
5935 #else  // CONFIG_NON_GREEDY_MV
5936 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
5937                                               uint8_t *cur_frame_buf,
5938                                               uint8_t *ref_frame_buf,
5939                                               int stride, BLOCK_SIZE bsize,
5940                                               MV *mv) {
5941   MACROBLOCK *const x = &td->mb;
5942   MACROBLOCKD *const xd = &x->e_mbd;
5943   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5944   const SEARCH_METHODS search_method = NSTEP;
5945   int step_param;
5946   int sadpb = x->sadperbit16;
5947   uint32_t bestsme = UINT_MAX;
5948   uint32_t distortion;
5949   uint32_t sse;
5950   int cost_list[5];
5951   const MvLimits tmp_mv_limits = x->mv_limits;
5952
5953   MV best_ref_mv1 = { 0, 0 };
5954   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5955
5956   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
5957   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
5958
5959   // Setup frame pointers
5960   x->plane[0].src.buf = cur_frame_buf;
5961   x->plane[0].src.stride = stride;
5962   xd->plane[0].pre[0].buf = ref_frame_buf;
5963   xd->plane[0].pre[0].stride = stride;
5964
5965   step_param = mv_sf->reduce_first_step_size;
5966   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
5967
5968   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
5969
5970   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
5971                         search_method, sadpb, cond_cost_list(cpi, cost_list),
5972                         &best_ref_mv1, mv, 0, 0);
5973
5974   /* restore UMV window */
5975   x->mv_limits = tmp_mv_limits;
5976
5977   // TODO(yunqing): may use higher tap interp filter than 2 taps.
5978   // Ignore mv costing by sending NULL pointer instead of cost array
5979   bestsme = cpi->find_fractional_mv_step(
5980       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
5981       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
5982       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
5983       USE_2_TAPS);
5984
5985   return bestsme;
5986 }
5987 #endif
5988
5989 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
5990                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
5991   int width = 0, height = 0;
5992   int bw = 4 << b_width_log2_lookup[bsize];
5993   int bh = 4 << b_height_log2_lookup[bsize];
5994
5995   switch (block) {
5996     case 0:
5997       width = grid_pos_col + bw - ref_pos_col;
5998       height = grid_pos_row + bh - ref_pos_row;
5999       break;
6000     case 1:
6001       width = ref_pos_col + bw - grid_pos_col;
6002       height = grid_pos_row + bh - ref_pos_row;
6003       break;
6004     case 2:
6005       width = grid_pos_col + bw - ref_pos_col;
6006       height = ref_pos_row + bh - grid_pos_row;
6007       break;
6008     case 3:
6009       width = ref_pos_col + bw - grid_pos_col;
6010       height = ref_pos_row + bh - grid_pos_row;
6011       break;
6012     default: assert(0);
6013   }
6014
6015   return width * height;
6016 }
6017
6018 static int round_floor(int ref_pos, int bsize_pix) {
6019   int round;
6020   if (ref_pos < 0)
6021     round = -(1 + (-ref_pos - 1) / bsize_pix);
6022   else
6023     round = ref_pos / bsize_pix;
6024
6025   return round;
6026 }
6027
6028 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6029                             BLOCK_SIZE bsize, int stride) {
6030   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6031   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6032   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6033   int idx, idy;
6034
6035   for (idy = 0; idy < mi_height; ++idy) {
6036     for (idx = 0; idx < mi_width; ++idx) {
6037       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6038       const int64_t mc_flow = tpl_ptr->mc_flow;
6039       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6040       *tpl_ptr = *src_stats;
6041       tpl_ptr->mc_flow = mc_flow;
6042       tpl_ptr->mc_ref_cost = mc_ref_cost;
6043       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6044     }
6045   }
6046 }
6047
6048 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6049                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6050   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6051   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6052   MV mv = tpl_stats->mv.as_mv;
6053   int mv_row = mv.row >> 3;
6054   int mv_col = mv.col >> 3;
6055
6056   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6057   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6058
6059   const int bw = 4 << b_width_log2_lookup[bsize];
6060   const int bh = 4 << b_height_log2_lookup[bsize];
6061   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6062   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6063   const int pix_num = bw * bh;
6064
6065   // top-left on grid block location in pixel
6066   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6067   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6068   int block;
6069
6070   for (block = 0; block < 4; ++block) {
6071     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6072     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6073
6074     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6075         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6076       int overlap_area = get_overlap_area(
6077           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6078       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6079       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6080
6081       int64_t mc_flow = tpl_stats->mc_dep_cost -
6082                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6083                             tpl_stats->intra_cost;
6084
6085       int idx, idy;
6086
6087       for (idy = 0; idy < mi_height; ++idy) {
6088         for (idx = 0; idx < mi_width; ++idx) {
6089           TplDepStats *des_stats =
6090               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6091                          (ref_mi_col + idx)];
6092
6093           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6094           des_stats->mc_ref_cost +=
6095               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6096               pix_num;
6097           assert(overlap_area >= 0);
6098         }
6099       }
6100     }
6101   }
6102 }
6103
6104 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6105                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6106   int idx, idy;
6107   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6108   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6109
6110   for (idy = 0; idy < mi_height; ++idy) {
6111     for (idx = 0; idx < mi_width; ++idx) {
6112       TplDepStats *tpl_ptr =
6113           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6114       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6115                          BLOCK_8X8);
6116     }
6117   }
6118 }
6119
6120 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6121                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6122                                TX_SIZE tx_size, int64_t *recon_error,
6123                                int64_t *sse) {
6124   MACROBLOCKD *const xd = &x->e_mbd;
6125   const struct macroblock_plane *const p = &x->plane[plane];
6126   const struct macroblockd_plane *const pd = &xd->plane[plane];
6127   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6128   uint16_t eob;
6129   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6130   const int shift = tx_size == TX_32X32 ? 0 : 2;
6131
6132 #if CONFIG_VP9_HIGHBITDEPTH
6133   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6134     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6135                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6136                                  &eob, scan_order->scan, scan_order->iscan);
6137   } else {
6138     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6139                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6140                           scan_order->scan, scan_order->iscan);
6141   }
6142 #else
6143   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6144                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6145                         scan_order->iscan);
6146 #endif  // CONFIG_VP9_HIGHBITDEPTH
6147
6148   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6149   *recon_error = VPXMAX(*recon_error, 1);
6150
6151   *sse = (*sse) >> shift;
6152   *sse = VPXMAX(*sse, 1);
6153 }
6154
6155 #if CONFIG_VP9_HIGHBITDEPTH
6156 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6157                          TX_SIZE tx_size) {
6158   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6159   switch (tx_size) {
6160     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6161     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6162     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6163     default: assert(0);
6164   }
6165 }
6166 #endif  // CONFIG_VP9_HIGHBITDEPTH
6167
6168 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6169                   TX_SIZE tx_size) {
6170   switch (tx_size) {
6171     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6172     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6173     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6174     default: assert(0);
6175   }
6176 }
6177
6178 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6179                           int mi_col) {
6180   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6181   x->mv_limits.row_max =
6182       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6183   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6184   x->mv_limits.col_max =
6185       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6186 }
6187
6188 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6189                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6190                             int frame_idx, TplDepFrame *tpl_frame,
6191                             int16_t *src_diff, tran_low_t *coeff,
6192                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6193                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6194                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6195                             int64_t *recon_error, int64_t *sse) {
6196   VP9_COMMON *cm = &cpi->common;
6197   ThreadData *td = &cpi->td;
6198
6199   const int bw = 4 << b_width_log2_lookup[bsize];
6200   const int bh = 4 << b_height_log2_lookup[bsize];
6201   const int pix_num = bw * bh;
6202   int best_rf_idx = -1;
6203   int_mv best_mv;
6204   int64_t best_inter_cost = INT64_MAX;
6205   int64_t inter_cost;
6206   int rf_idx;
6207   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6208
6209   int64_t best_intra_cost = INT64_MAX;
6210   int64_t intra_cost;
6211   PREDICTION_MODE mode;
6212   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6213   MODE_INFO mi_above, mi_left;
6214   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6215   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6216   TplDepStats *tpl_stats =
6217       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6218
6219   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6220   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6221   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6222   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6223   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6224   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6225
6226   // Intra prediction search
6227   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6228     uint8_t *src, *dst;
6229     int src_stride, dst_stride;
6230
6231     src = xd->cur_buf->y_buffer + mb_y_offset;
6232     src_stride = xd->cur_buf->y_stride;
6233
6234     dst = &predictor[0];
6235     dst_stride = bw;
6236
6237     xd->mi[0]->sb_type = bsize;
6238     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6239
6240     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6241                             src_stride, dst, dst_stride, 0, 0, 0);
6242
6243 #if CONFIG_VP9_HIGHBITDEPTH
6244     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6245       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6246                                 dst_stride, xd->bd);
6247       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6248       intra_cost = vpx_highbd_satd(coeff, pix_num);
6249     } else {
6250       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6251                          dst_stride);
6252       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6253       intra_cost = vpx_satd(coeff, pix_num);
6254     }
6255 #else
6256     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6257     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6258     intra_cost = vpx_satd(coeff, pix_num);
6259 #endif  // CONFIG_VP9_HIGHBITDEPTH
6260
6261     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6262   }
6263
6264   // Motion compensated prediction
6265   best_mv.as_int = 0;
6266
6267   set_mv_limits(cm, x, mi_row, mi_col);
6268
6269   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6270     int_mv mv;
6271 #if CONFIG_NON_GREEDY_MV
6272     MotionField *motion_field;
6273 #endif
6274     if (ref_frame[rf_idx] == NULL) continue;
6275
6276 #if CONFIG_NON_GREEDY_MV
6277     (void)td;
6278     motion_field = vp9_motion_field_info_get_motion_field(
6279         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6280     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6281 #else
6282     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6283                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6284                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6285 #endif
6286
6287 #if CONFIG_VP9_HIGHBITDEPTH
6288     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6289       vp9_highbd_build_inter_predictor(
6290           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6291           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6292           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6293           mi_row * MI_SIZE, xd->bd);
6294       vpx_highbd_subtract_block(
6295           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6296           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6297       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6298       inter_cost = vpx_highbd_satd(coeff, pix_num);
6299     } else {
6300       vp9_build_inter_predictor(
6301           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6302           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6303           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6304       vpx_subtract_block(bh, bw, src_diff, bw,
6305                          xd->cur_buf->y_buffer + mb_y_offset,
6306                          xd->cur_buf->y_stride, &predictor[0], bw);
6307       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6308       inter_cost = vpx_satd(coeff, pix_num);
6309     }
6310 #else
6311     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6312                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6313                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6314                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6315     vpx_subtract_block(bh, bw, src_diff, bw,
6316                        xd->cur_buf->y_buffer + mb_y_offset,
6317                        xd->cur_buf->y_stride, &predictor[0], bw);
6318     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6319     inter_cost = vpx_satd(coeff, pix_num);
6320 #endif
6321
6322     if (inter_cost < best_inter_cost) {
6323       best_rf_idx = rf_idx;
6324       best_inter_cost = inter_cost;
6325       best_mv.as_int = mv.as_int;
6326       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6327                          sse);
6328     }
6329   }
6330   best_intra_cost = VPXMAX(best_intra_cost, 1);
6331   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6332   tpl_stats->inter_cost = VPXMAX(
6333       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6334   tpl_stats->intra_cost = VPXMAX(
6335       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6336   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6337   tpl_stats->mv.as_int = best_mv.as_int;
6338 }
6339
6340 #if CONFIG_NON_GREEDY_MV
6341 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6342                                   int frame_idx, int rf_idx, int mi_row,
6343                                   int mi_col, struct buf_2d *src,
6344                                   struct buf_2d *pre) {
6345   const int mb_y_offset =
6346       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6347   YV12_BUFFER_CONFIG *ref_frame = NULL;
6348   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6349   if (ref_frame_idx != -1) {
6350     ref_frame = gf_picture[ref_frame_idx].frame;
6351     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6352     src->stride = xd->cur_buf->y_stride;
6353     pre->buf = ref_frame->y_buffer + mb_y_offset;
6354     pre->stride = ref_frame->y_stride;
6355     assert(src->stride == pre->stride);
6356     return 1;
6357   } else {
6358     printf("invalid ref_frame_idx");
6359     assert(ref_frame_idx != -1);
6360     return 0;
6361   }
6362 }
6363
6364 #define kMvPreCheckLines 5
6365 #define kMvPreCheckSize 15
6366
6367 #define MV_REF_POS_NUM 3
6368 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6369   { -1, 0 },
6370   { 0, -1 },
6371   { -1, -1 },
6372 };
6373
6374 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6375                              int mi_col) {
6376   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6377 }
6378
6379 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6380                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6381   int i;
6382   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6383   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6384   int_mv nearest_mv, near_mv, invalid_mv;
6385   nearest_mv.as_int = INVALID_MV;
6386   near_mv.as_int = INVALID_MV;
6387   invalid_mv.as_int = INVALID_MV;
6388   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6389     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6390     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6391     assert(mv_ref_pos[i].row <= 0);
6392     assert(mv_ref_pos[i].col <= 0);
6393     if (nb_row >= 0 && nb_col >= 0) {
6394       if (nearest_mv.as_int == INVALID_MV) {
6395         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6396       } else {
6397         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6398         if (mv.as_int == nearest_mv.as_int) {
6399           continue;
6400         } else {
6401           near_mv = mv;
6402           break;
6403         }
6404       }
6405     }
6406   }
6407   if (nearest_mv.as_int == INVALID_MV) {
6408     nearest_mv.as_mv.row = 0;
6409     nearest_mv.as_mv.col = 0;
6410   }
6411   if (near_mv.as_int == INVALID_MV) {
6412     near_mv.as_mv.row = 0;
6413     near_mv.as_mv.col = 0;
6414   }
6415   if (mv_mode == NEAREST_MV_MODE) {
6416     return nearest_mv;
6417   }
6418   if (mv_mode == NEAR_MV_MODE) {
6419     return near_mv;
6420   }
6421   assert(0);
6422   return invalid_mv;
6423 }
6424
6425 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6426                                   MotionField *motion_field,
6427                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6428                                   int mi_row, int mi_col) {
6429   int_mv mv;
6430   switch (mv_mode) {
6431     case ZERO_MV_MODE:
6432       mv.as_mv.row = 0;
6433       mv.as_mv.col = 0;
6434       break;
6435     case NEW_MV_MODE:
6436       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6437       break;
6438     case NEAREST_MV_MODE:
6439       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6440       break;
6441     case NEAR_MV_MODE:
6442       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6443       break;
6444     default:
6445       mv.as_int = INVALID_MV;
6446       assert(0);
6447       break;
6448   }
6449   return mv;
6450 }
6451
6452 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6453                           GF_PICTURE *gf_picture, MotionField *motion_field,
6454                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6455                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6456                           int_mv *mv) {
6457   uint32_t sse;
6458   struct buf_2d src;
6459   struct buf_2d pre;
6460   MV full_mv;
6461   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6462                             mi_row, mi_col);
6463   full_mv = get_full_mv(&mv->as_mv);
6464   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6465                              &src, &pre)) {
6466     // TODO(angiebird): Consider subpixel when computing the sse.
6467     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6468                           pre.stride, &sse);
6469     return (double)(sse << VP9_DIST_SCALE_LOG2);
6470   } else {
6471     assert(0);
6472     return 0;
6473   }
6474 }
6475
6476 static int get_mv_mode_cost(int mv_mode) {
6477   // TODO(angiebird): The probabilities are roughly inferred from
6478   // default_inter_mode_probs. Check if there is a better way to set the
6479   // probabilities.
6480   const int zero_mv_prob = 16;
6481   const int new_mv_prob = 24 * 1;
6482   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6483   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6484   switch (mv_mode) {
6485     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6486     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6487     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6488     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6489     default: assert(0); return -1;
6490   }
6491 }
6492
6493 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6494   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6495                         log2(1 + abs(new_mv->col - ref_mv->col));
6496   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6497   return mv_diff_cost;
6498 }
6499 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6500                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6501                           int mi_col) {
6502   double mv_cost = get_mv_mode_cost(mv_mode);
6503   if (mv_mode == NEW_MV_MODE) {
6504     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6505                                     bsize, mi_row, mi_col)
6506                     .as_mv;
6507     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6508                                         tpl_frame, bsize, mi_row, mi_col)
6509                         .as_mv;
6510     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6511                                      bsize, mi_row, mi_col)
6512                      .as_mv;
6513     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6514     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6515     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6516   }
6517   return mv_cost;
6518 }
6519
6520 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6521                            GF_PICTURE *gf_picture, MotionField *motion_field,
6522                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6523                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6524                            int_mv *mv) {
6525   MACROBLOCKD *xd = &x->e_mbd;
6526   double mv_dist =
6527       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6528                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6529   double mv_cost =
6530       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6531   double mult = 180;
6532
6533   return mv_cost + mult * log2f(1 + mv_dist);
6534 }
6535
6536 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6537                                  GF_PICTURE *gf_picture,
6538                                  MotionField *motion_field, int frame_idx,
6539                                  TplDepFrame *tpl_frame, int rf_idx,
6540                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6541                                  double *rd, int_mv *mv) {
6542   int best_mv_mode = ZERO_MV_MODE;
6543   int update = 0;
6544   int mv_mode;
6545   *rd = 0;
6546   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6547     double this_rd;
6548     int_mv this_mv;
6549     if (mv_mode == NEW_MV_MODE) {
6550       continue;
6551     }
6552     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6553                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6554     if (update == 0) {
6555       *rd = this_rd;
6556       *mv = this_mv;
6557       best_mv_mode = mv_mode;
6558       update = 1;
6559     } else {
6560       if (this_rd < *rd) {
6561         *rd = this_rd;
6562         *mv = this_mv;
6563         best_mv_mode = mv_mode;
6564       }
6565     }
6566   }
6567   return best_mv_mode;
6568 }
6569
6570 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6571                             GF_PICTURE *gf_picture, MotionField *motion_field,
6572                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6573                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6574   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6575   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6576   int tmp_mv_mode_arr[kMvPreCheckSize];
6577   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6578   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6579   int_mv *select_mv_arr = cpi->select_mv_arr;
6580   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6581   int stride = tpl_frame->stride;
6582   double new_mv_rd = 0;
6583   double no_new_mv_rd = 0;
6584   double this_new_mv_rd = 0;
6585   double this_no_new_mv_rd = 0;
6586   int idx;
6587   int tmp_idx;
6588   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6589
6590   // no new mv
6591   // diagnal scan order
6592   tmp_idx = 0;
6593   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6594     int r;
6595     for (r = 0; r <= idx; ++r) {
6596       int c = idx - r;
6597       int nb_row = mi_row + r * mi_height;
6598       int nb_col = mi_col + c * mi_width;
6599       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6600         double this_rd;
6601         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6602         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6603             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6604             bsize, nb_row, nb_col, &this_rd, mv);
6605         if (r == 0 && c == 0) {
6606           this_no_new_mv_rd = this_rd;
6607         }
6608         no_new_mv_rd += this_rd;
6609         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6610         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6611         ++tmp_idx;
6612       }
6613     }
6614   }
6615
6616   // new mv
6617   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6618   this_new_mv_rd = eval_mv_mode(
6619       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6620       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6621   new_mv_rd = this_new_mv_rd;
6622   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6623   // beforehand.
6624   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6625     int r;
6626     for (r = 0; r <= idx; ++r) {
6627       int c = idx - r;
6628       int nb_row = mi_row + r * mi_height;
6629       int nb_col = mi_col + c * mi_width;
6630       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6631         double this_rd;
6632         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6633         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6634             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6635             bsize, nb_row, nb_col, &this_rd, mv);
6636         new_mv_rd += this_rd;
6637       }
6638     }
6639   }
6640
6641   // update best_mv_mode
6642   tmp_idx = 0;
6643   if (no_new_mv_rd < new_mv_rd) {
6644     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6645       int r;
6646       for (r = 0; r <= idx; ++r) {
6647         int c = idx - r;
6648         int nb_row = mi_row + r * mi_height;
6649         int nb_col = mi_col + c * mi_width;
6650         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6651           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6652           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6653           ++tmp_idx;
6654         }
6655       }
6656     }
6657     rd_diff_arr[mi_row * stride + mi_col] = 0;
6658   } else {
6659     rd_diff_arr[mi_row * stride + mi_col] =
6660         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6661   }
6662 }
6663
6664 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6665                                 GF_PICTURE *gf_picture,
6666                                 MotionField *motion_field, int frame_idx,
6667                                 TplDepFrame *tpl_frame, int rf_idx,
6668                                 BLOCK_SIZE bsize) {
6669   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6670   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6671   const int unit_rows = tpl_frame->mi_rows / mi_height;
6672   const int unit_cols = tpl_frame->mi_cols / mi_width;
6673   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6674   int idx;
6675   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6676     int r;
6677     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6678          ++r) {
6679       int c = idx - r;
6680       int mi_row = r * mi_height;
6681       int mi_col = c * mi_width;
6682       assert(c >= 0 && c < unit_cols);
6683       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6684       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6685       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6686                       rf_idx, bsize, mi_row, mi_col);
6687     }
6688   }
6689 }
6690
6691 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6692                              MotionField *motion_field, int frame_idx,
6693                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
6694                              int mi_row, int mi_col) {
6695   VP9_COMMON *cm = &cpi->common;
6696   MACROBLOCK *x = &td->mb;
6697   MACROBLOCKD *xd = &x->e_mbd;
6698   const int mb_y_offset =
6699       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6700   assert(ref_frame != NULL);
6701   set_mv_limits(cm, x, mi_row, mi_col);
6702   {
6703     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6704     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
6705     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
6706     const int stride = xd->cur_buf->y_stride;
6707     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
6708                              ref_frame_buf, stride, bsize, mi_row, mi_col,
6709                              &mv.as_mv);
6710     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
6711                             bsize, &mv.as_mv);
6712     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
6713   }
6714 }
6715
6716 static void build_motion_field(
6717     VP9_COMP *cpi, int frame_idx,
6718     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
6719   VP9_COMMON *cm = &cpi->common;
6720   ThreadData *td = &cpi->td;
6721   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6722   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6723   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6724   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
6725   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
6726   int mi_row, mi_col;
6727   int rf_idx;
6728
6729   tpl_frame->lambda = (pw * ph) >> 2;
6730   assert(pw * ph == tpl_frame->lambda << 2);
6731
6732   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6733     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6734         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6735     if (ref_frame[rf_idx] == NULL) {
6736       continue;
6737     }
6738     vp9_motion_field_reset_mvs(motion_field);
6739     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6740       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6741         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
6742                          bsize, mi_row, mi_col);
6743       }
6744     }
6745   }
6746 }
6747 #endif  // CONFIG_NON_GREEDY_MV
6748
6749 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6750                               int frame_idx, BLOCK_SIZE bsize) {
6751   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6752   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
6753   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
6754
6755   VP9_COMMON *cm = &cpi->common;
6756   struct scale_factors sf;
6757   int rdmult, idx;
6758   ThreadData *td = &cpi->td;
6759   MACROBLOCK *x = &td->mb;
6760   MACROBLOCKD *xd = &x->e_mbd;
6761   int mi_row, mi_col;
6762
6763 #if CONFIG_VP9_HIGHBITDEPTH
6764   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
6765   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
6766   uint8_t *predictor;
6767 #else
6768   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
6769 #endif
6770   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
6771   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
6772   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
6773   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
6774
6775   const TX_SIZE tx_size = max_txsize_lookup[bsize];
6776   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6777   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6778   int64_t recon_error, sse;
6779 #if CONFIG_NON_GREEDY_MV
6780   int square_block_idx;
6781   int rf_idx;
6782 #endif
6783
6784   // Setup scaling factor
6785 #if CONFIG_VP9_HIGHBITDEPTH
6786   vp9_setup_scale_factors_for_frame(
6787       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6788       this_frame->y_crop_width, this_frame->y_crop_height,
6789       cpi->common.use_highbitdepth);
6790
6791   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6792     predictor = CONVERT_TO_BYTEPTR(predictor16);
6793   else
6794     predictor = predictor8;
6795 #else
6796   vp9_setup_scale_factors_for_frame(
6797       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6798       this_frame->y_crop_width, this_frame->y_crop_height);
6799 #endif  // CONFIG_VP9_HIGHBITDEPTH
6800
6801   // Prepare reference frame pointers. If any reference frame slot is
6802   // unavailable, the pointer will be set to Null.
6803   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
6804     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
6805     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
6806   }
6807
6808   xd->mi = cm->mi_grid_visible;
6809   xd->mi[0] = cm->mi;
6810   xd->cur_buf = this_frame;
6811
6812   // Get rd multiplier set up.
6813   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
6814   set_error_per_bit(&cpi->td.mb, rdmult);
6815   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
6816
6817   tpl_frame->is_valid = 1;
6818
6819   cm->base_qindex = tpl_frame->base_qindex;
6820   vp9_frame_init_quantizer(cpi);
6821
6822 #if CONFIG_NON_GREEDY_MV
6823   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
6824        ++square_block_idx) {
6825     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
6826     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
6827   }
6828   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6829     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6830     if (ref_frame_idx != -1) {
6831       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6832           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6833       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
6834                           tpl_frame, rf_idx, bsize);
6835     }
6836   }
6837 #endif
6838
6839   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6840     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6841       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
6842                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
6843                       tx_size, ref_frame, predictor, &recon_error, &sse);
6844       // Motion flow dependency dispenser.
6845       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
6846                       tpl_frame->stride);
6847
6848       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
6849                        bsize);
6850     }
6851   }
6852 }
6853
6854 #if CONFIG_NON_GREEDY_MV
6855 #define DUMP_TPL_STATS 0
6856 #if DUMP_TPL_STATS
6857 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
6858   int i, j;
6859   printf("%d %d\n", h, w);
6860   for (i = 0; i < h; ++i) {
6861     for (j = 0; j < w; ++j) {
6862       printf("%d ", buf[(row + i) * stride + col + j]);
6863     }
6864   }
6865   printf("\n");
6866 }
6867
6868 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
6869   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
6870            frame_buf->y_width);
6871   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
6872            frame_buf->uv_height, frame_buf->uv_width);
6873   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
6874            frame_buf->uv_height, frame_buf->uv_width);
6875 }
6876
6877 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
6878                            const GF_GROUP *gf_group,
6879                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
6880   int frame_idx;
6881   const VP9_COMMON *cm = &cpi->common;
6882   int rf_idx;
6883   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
6884     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6885       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6886       int mi_row, mi_col;
6887       int ref_frame_idx;
6888       const int mi_height = num_8x8_blocks_high_lookup[bsize];
6889       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6890       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6891       if (ref_frame_idx != -1) {
6892         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
6893         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
6894         const int ref_gf_frame_offset =
6895             gf_group->frame_gop_index[ref_frame_idx];
6896         printf("=\n");
6897         printf(
6898             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
6899             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
6900             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
6901             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
6902         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6903           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6904             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6905               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
6906                                                        frame_idx, rf_idx, bsize,
6907                                                        mi_row, mi_col);
6908               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
6909                      mv.as_mv.col);
6910             }
6911           }
6912         }
6913         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6914           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6915             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6916               const TplDepStats *tpl_ptr =
6917                   &tpl_frame
6918                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6919               printf("%f ", tpl_ptr->feature_score);
6920             }
6921           }
6922         }
6923         printf("\n");
6924
6925         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6926           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6927             const int mv_mode =
6928                 tpl_frame
6929                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
6930             printf("%d ", mv_mode);
6931           }
6932         }
6933         printf("\n");
6934
6935         dump_frame_buf(gf_picture[frame_idx].frame);
6936         dump_frame_buf(ref_frame_buf);
6937       }
6938     }
6939   }
6940 }
6941 #endif  // DUMP_TPL_STATS
6942 #endif  // CONFIG_NON_GREEDY_MV
6943
6944 static void init_tpl_buffer(VP9_COMP *cpi) {
6945   VP9_COMMON *cm = &cpi->common;
6946   int frame;
6947
6948   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
6949   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
6950 #if CONFIG_NON_GREEDY_MV
6951   int rf_idx;
6952
6953   vpx_free(cpi->select_mv_arr);
6954   CHECK_MEM_ERROR(
6955       cm, cpi->select_mv_arr,
6956       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
6957 #endif
6958
6959   // TODO(jingning): Reduce the actual memory use for tpl model build up.
6960   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
6961     if (cpi->tpl_stats[frame].width >= mi_cols &&
6962         cpi->tpl_stats[frame].height >= mi_rows &&
6963         cpi->tpl_stats[frame].tpl_stats_ptr)
6964       continue;
6965
6966 #if CONFIG_NON_GREEDY_MV
6967     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6968       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
6969       CHECK_MEM_ERROR(
6970           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
6971           vpx_calloc(mi_rows * mi_cols * 4,
6972                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
6973       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
6974       CHECK_MEM_ERROR(
6975           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
6976           vpx_calloc(mi_rows * mi_cols * 4,
6977                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
6978     }
6979 #endif
6980     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
6981     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
6982                     vpx_calloc(mi_rows * mi_cols,
6983                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
6984     cpi->tpl_stats[frame].is_valid = 0;
6985     cpi->tpl_stats[frame].width = mi_cols;
6986     cpi->tpl_stats[frame].height = mi_rows;
6987     cpi->tpl_stats[frame].stride = mi_cols;
6988     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
6989     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
6990   }
6991
6992   for (frame = 0; frame < REF_FRAMES; ++frame) {
6993     cpi->enc_frame_buf[frame].mem_valid = 0;
6994     cpi->enc_frame_buf[frame].released = 1;
6995   }
6996 }
6997
6998 static void free_tpl_buffer(VP9_COMP *cpi) {
6999   int frame;
7000 #if CONFIG_NON_GREEDY_MV
7001   vp9_free_motion_field_info(&cpi->motion_field_info);
7002   vpx_free(cpi->select_mv_arr);
7003 #endif
7004   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7005 #if CONFIG_NON_GREEDY_MV
7006     int rf_idx;
7007     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7008       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7009       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7010     }
7011 #endif
7012     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7013     cpi->tpl_stats[frame].is_valid = 0;
7014   }
7015 }
7016
7017 static void setup_tpl_stats(VP9_COMP *cpi) {
7018   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7019   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7020   int tpl_group_frames = 0;
7021   int frame_idx;
7022   cpi->tpl_bsize = BLOCK_32X32;
7023
7024   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7025
7026   init_tpl_stats(cpi);
7027
7028   // Backward propagation from tpl_group_frames to 1.
7029   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7030     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7031     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7032   }
7033 #if CONFIG_NON_GREEDY_MV
7034   cpi->tpl_ready = 1;
7035 #if DUMP_TPL_STATS
7036   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7037 #endif  // DUMP_TPL_STATS
7038 #endif  // CONFIG_NON_GREEDY_MV
7039 }
7040
7041 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7042                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7043                             int64_t *time_end, int flush) {
7044   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7045   VP9_COMMON *const cm = &cpi->common;
7046   BufferPool *const pool = cm->buffer_pool;
7047   RATE_CONTROL *const rc = &cpi->rc;
7048   struct vpx_usec_timer cmptimer;
7049   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7050   struct lookahead_entry *last_source = NULL;
7051   struct lookahead_entry *source = NULL;
7052   int arf_src_index;
7053   const int gf_group_index = cpi->twopass.gf_group.index;
7054   int i;
7055
7056   if (is_one_pass_cbr_svc(cpi)) {
7057     vp9_one_pass_cbr_svc_start_layer(cpi);
7058   }
7059
7060   vpx_usec_timer_start(&cmptimer);
7061
7062   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7063
7064   // Is multi-arf enabled.
7065   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7066   // will not work properly with svc.
7067   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7068   // is greater than or equal to 2.
7069   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7070     cpi->multi_layer_arf = 1;
7071   else
7072     cpi->multi_layer_arf = 0;
7073
7074   // Normal defaults
7075   cm->reset_frame_context = 0;
7076   cm->refresh_frame_context = 1;
7077   if (!is_one_pass_cbr_svc(cpi)) {
7078     cpi->refresh_last_frame = 1;
7079     cpi->refresh_golden_frame = 0;
7080     cpi->refresh_alt_ref_frame = 0;
7081   }
7082
7083   // Should we encode an arf frame.
7084   arf_src_index = get_arf_src_index(cpi);
7085
7086   if (arf_src_index) {
7087     for (i = 0; i <= arf_src_index; ++i) {
7088       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7089       // Avoid creating an alt-ref if there's a forced keyframe pending.
7090       if (e == NULL) {
7091         break;
7092       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7093         arf_src_index = 0;
7094         flush = 1;
7095         break;
7096       }
7097     }
7098   }
7099
7100   // Clear arf index stack before group of pictures processing starts.
7101   if (gf_group_index == 1) {
7102     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7103     cpi->twopass.gf_group.stack_size = 0;
7104   }
7105
7106   if (arf_src_index) {
7107     assert(arf_src_index <= rc->frames_to_key);
7108     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7109       cpi->alt_ref_source = source;
7110
7111 #if !CONFIG_REALTIME_ONLY
7112       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7113           (oxcf->arnr_strength > 0)) {
7114         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7115         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7116
7117         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7118         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7119
7120         // Produce the filtered ARF frame.
7121         vp9_temporal_filter(cpi, arf_src_index);
7122         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7123
7124         // for small bitrates segmentation overhead usually
7125         // eats all bitrate gain from enabling delta quantizers
7126         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7127           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7128
7129         force_src_buffer = &cpi->alt_ref_buffer;
7130       }
7131 #endif
7132       cm->show_frame = 0;
7133       cm->intra_only = 0;
7134       cpi->refresh_alt_ref_frame = 1;
7135       cpi->refresh_golden_frame = 0;
7136       cpi->refresh_last_frame = 0;
7137       rc->is_src_frame_alt_ref = 0;
7138       rc->source_alt_ref_pending = 0;
7139     } else {
7140       rc->source_alt_ref_pending = 0;
7141     }
7142   }
7143
7144   if (!source) {
7145     // Get last frame source.
7146     if (cm->current_video_frame > 0) {
7147       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7148         return -1;
7149     }
7150
7151     // Read in the source frame.
7152     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7153       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7154     else
7155       source = vp9_lookahead_pop(cpi->lookahead, flush);
7156
7157     if (source != NULL) {
7158       cm->show_frame = 1;
7159       cm->intra_only = 0;
7160       // If the flags indicate intra frame, but if the current picture is for
7161       // spatial layer above first_spatial_layer_to_encode, it should not be an
7162       // intra picture.
7163       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7164           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7165         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7166       }
7167
7168       // Check to see if the frame should be encoded as an arf overlay.
7169       check_src_altref(cpi, source);
7170     }
7171   }
7172
7173   if (source) {
7174     cpi->un_scaled_source = cpi->Source =
7175         force_src_buffer ? force_src_buffer : &source->img;
7176
7177 #ifdef ENABLE_KF_DENOISE
7178     // Copy of raw source for metrics calculation.
7179     if (is_psnr_calc_enabled(cpi))
7180       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7181 #endif
7182
7183     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7184
7185     *time_stamp = source->ts_start;
7186     *time_end = source->ts_end;
7187     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7188   } else {
7189     *size = 0;
7190 #if !CONFIG_REALTIME_ONLY
7191     if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
7192       vp9_end_first_pass(cpi); /* get last stats packet */
7193       cpi->twopass.first_pass_done = 1;
7194     }
7195 #endif  // !CONFIG_REALTIME_ONLY
7196     return -1;
7197   }
7198
7199   if (source->ts_start < cpi->first_time_stamp_ever) {
7200     cpi->first_time_stamp_ever = source->ts_start;
7201     cpi->last_end_time_stamp_seen = source->ts_start;
7202   }
7203
7204   // Clear down mmx registers
7205   vpx_clear_system_state();
7206
7207   // adjust frame rates based on timestamps given
7208   if (cm->show_frame) {
7209     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7210         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7211       vp9_svc_adjust_frame_rate(cpi);
7212     else
7213       adjust_frame_rate(cpi, source);
7214   }
7215
7216   if (is_one_pass_cbr_svc(cpi)) {
7217     vp9_update_temporal_layer_framerate(cpi);
7218     vp9_restore_layer_context(cpi);
7219   }
7220
7221   // Find a free buffer for the new frame, releasing the reference previously
7222   // held.
7223   if (cm->new_fb_idx != INVALID_IDX) {
7224     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7225   }
7226   cm->new_fb_idx = get_free_fb(cm);
7227
7228   if (cm->new_fb_idx == INVALID_IDX) return -1;
7229
7230   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7231
7232   // Start with a 0 size frame.
7233   *size = 0;
7234
7235   cpi->frame_flags = *frame_flags;
7236
7237 #if !CONFIG_REALTIME_ONLY
7238   if ((oxcf->pass == 2) && !cpi->use_svc) {
7239     vp9_rc_get_second_pass_params(cpi);
7240   } else if (oxcf->pass == 1) {
7241     set_frame_size(cpi);
7242   }
7243 #endif  // !CONFIG_REALTIME_ONLY
7244
7245   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7246       cpi->level_constraint.fail_flag == 0)
7247     level_rc_framerate(cpi, arf_src_index);
7248
7249   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7250     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7251   }
7252
7253   if (cpi->kmeans_data_arr_alloc == 0) {
7254     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7255     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7256 #if CONFIG_MULTITHREAD
7257     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7258 #endif
7259     CHECK_MEM_ERROR(
7260         cm, cpi->kmeans_data_arr,
7261         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7262     cpi->kmeans_data_stride = mi_cols;
7263     cpi->kmeans_data_arr_alloc = 1;
7264   }
7265
7266 #if CONFIG_NON_GREEDY_MV
7267   {
7268     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7269     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7270     Status status = vp9_alloc_motion_field_info(
7271         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7272     if (status == STATUS_FAILED) {
7273       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7274                          "vp9_alloc_motion_field_info failed");
7275     }
7276   }
7277 #endif  // CONFIG_NON_GREEDY_MV
7278
7279   if (gf_group_index == 1 &&
7280       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7281       cpi->sf.enable_tpl_model) {
7282     init_tpl_buffer(cpi);
7283     vp9_estimate_qp_gop(cpi);
7284     setup_tpl_stats(cpi);
7285   }
7286
7287 #if CONFIG_BITSTREAM_DEBUG
7288   assert(cpi->oxcf.max_threads == 0 &&
7289          "bitstream debug tool does not support multithreading");
7290   bitstream_queue_record_write();
7291 #endif
7292 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7293   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7294 #endif
7295
7296   cpi->td.mb.fp_src_pred = 0;
7297 #if CONFIG_REALTIME_ONLY
7298   if (cpi->use_svc) {
7299     SvcEncode(cpi, size, dest, frame_flags);
7300   } else {
7301     // One pass encode
7302     Pass0Encode(cpi, size, dest, frame_flags);
7303   }
7304 #else  // !CONFIG_REALTIME_ONLY
7305   if (oxcf->pass == 1 && !cpi->use_svc) {
7306     const int lossless = is_lossless_requested(oxcf);
7307 #if CONFIG_VP9_HIGHBITDEPTH
7308     if (cpi->oxcf.use_highbitdepth)
7309       cpi->td.mb.fwd_txfm4x4 =
7310           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7311     else
7312       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7313     cpi->td.mb.highbd_inv_txfm_add =
7314         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7315 #else
7316     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7317 #endif  // CONFIG_VP9_HIGHBITDEPTH
7318     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7319     vp9_first_pass(cpi, source);
7320   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7321     Pass2Encode(cpi, size, dest, frame_flags);
7322   } else if (cpi->use_svc) {
7323     SvcEncode(cpi, size, dest, frame_flags);
7324   } else {
7325     // One pass encode
7326     Pass0Encode(cpi, size, dest, frame_flags);
7327   }
7328 #endif  // CONFIG_REALTIME_ONLY
7329
7330   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7331
7332   if (cm->refresh_frame_context)
7333     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7334
7335   // No frame encoded, or frame was dropped, release scaled references.
7336   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7337     release_scaled_references(cpi);
7338   }
7339
7340   if (*size > 0) {
7341     cpi->droppable = !frame_is_reference(cpi);
7342   }
7343
7344   // Save layer specific state.
7345   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7346                                     cpi->svc.number_spatial_layers > 1) &&
7347                                    oxcf->pass == 2)) {
7348     vp9_save_layer_context(cpi);
7349   }
7350
7351   vpx_usec_timer_mark(&cmptimer);
7352   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7353
7354   if (cpi->keep_level_stats && oxcf->pass != 1)
7355     update_level_info(cpi, size, arf_src_index);
7356
7357 #if CONFIG_INTERNAL_STATS
7358
7359   if (oxcf->pass != 1) {
7360     double samples = 0.0;
7361     cpi->bytes += (int)(*size);
7362
7363     if (cm->show_frame) {
7364       uint32_t bit_depth = 8;
7365       uint32_t in_bit_depth = 8;
7366       cpi->count++;
7367 #if CONFIG_VP9_HIGHBITDEPTH
7368       if (cm->use_highbitdepth) {
7369         in_bit_depth = cpi->oxcf.input_bit_depth;
7370         bit_depth = cm->bit_depth;
7371       }
7372 #endif
7373
7374       if (cpi->b_calculate_psnr) {
7375         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7376         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7377         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7378         PSNR_STATS psnr;
7379 #if CONFIG_VP9_HIGHBITDEPTH
7380         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7381                              in_bit_depth);
7382 #else
7383         vpx_calc_psnr(orig, recon, &psnr);
7384 #endif  // CONFIG_VP9_HIGHBITDEPTH
7385
7386         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7387                           psnr.psnr[0], &cpi->psnr);
7388         cpi->total_sq_error += psnr.sse[0];
7389         cpi->total_samples += psnr.samples[0];
7390         samples = psnr.samples[0];
7391
7392         {
7393           PSNR_STATS psnr2;
7394           double frame_ssim2 = 0, weight = 0;
7395 #if CONFIG_VP9_POSTPROC
7396           if (vpx_alloc_frame_buffer(
7397                   pp, recon->y_crop_width, recon->y_crop_height,
7398                   cm->subsampling_x, cm->subsampling_y,
7399 #if CONFIG_VP9_HIGHBITDEPTH
7400                   cm->use_highbitdepth,
7401 #endif
7402                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7403             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7404                                "Failed to allocate post processing buffer");
7405           }
7406           {
7407             vp9_ppflags_t ppflags;
7408             ppflags.post_proc_flag = VP9D_DEBLOCK;
7409             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7410             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7411             vp9_post_proc_frame(cm, pp, &ppflags,
7412                                 cpi->un_scaled_source->y_width);
7413           }
7414 #endif
7415           vpx_clear_system_state();
7416
7417 #if CONFIG_VP9_HIGHBITDEPTH
7418           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7419                                cpi->oxcf.input_bit_depth);
7420 #else
7421           vpx_calc_psnr(orig, pp, &psnr2);
7422 #endif  // CONFIG_VP9_HIGHBITDEPTH
7423
7424           cpi->totalp_sq_error += psnr2.sse[0];
7425           cpi->totalp_samples += psnr2.samples[0];
7426           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7427                             psnr2.psnr[0], &cpi->psnrp);
7428
7429 #if CONFIG_VP9_HIGHBITDEPTH
7430           if (cm->use_highbitdepth) {
7431             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7432                                                in_bit_depth);
7433           } else {
7434             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7435           }
7436 #else
7437           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7438 #endif  // CONFIG_VP9_HIGHBITDEPTH
7439
7440           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7441           cpi->summed_quality += frame_ssim2 * weight;
7442           cpi->summed_weights += weight;
7443
7444 #if CONFIG_VP9_HIGHBITDEPTH
7445           if (cm->use_highbitdepth) {
7446             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7447                                                in_bit_depth);
7448           } else {
7449             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7450           }
7451 #else
7452           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7453 #endif  // CONFIG_VP9_HIGHBITDEPTH
7454
7455           cpi->summedp_quality += frame_ssim2 * weight;
7456           cpi->summedp_weights += weight;
7457 #if 0
7458           if (cm->show_frame) {
7459             FILE *f = fopen("q_used.stt", "a");
7460             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7461                     cpi->common.current_video_frame, psnr2.psnr[1],
7462                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7463             fclose(f);
7464           }
7465 #endif
7466         }
7467       }
7468       if (cpi->b_calculate_blockiness) {
7469 #if CONFIG_VP9_HIGHBITDEPTH
7470         if (!cm->use_highbitdepth)
7471 #endif
7472         {
7473           double frame_blockiness = vp9_get_blockiness(
7474               cpi->Source->y_buffer, cpi->Source->y_stride,
7475               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7476               cpi->Source->y_width, cpi->Source->y_height);
7477           cpi->worst_blockiness =
7478               VPXMAX(cpi->worst_blockiness, frame_blockiness);
7479           cpi->total_blockiness += frame_blockiness;
7480         }
7481       }
7482
7483       if (cpi->b_calculate_consistency) {
7484 #if CONFIG_VP9_HIGHBITDEPTH
7485         if (!cm->use_highbitdepth)
7486 #endif
7487         {
7488           double this_inconsistency = vpx_get_ssim_metrics(
7489               cpi->Source->y_buffer, cpi->Source->y_stride,
7490               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7491               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
7492               &cpi->metrics, 1);
7493
7494           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
7495           double consistency =
7496               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
7497           if (consistency > 0.0)
7498             cpi->worst_consistency =
7499                 VPXMIN(cpi->worst_consistency, consistency);
7500           cpi->total_inconsistency += this_inconsistency;
7501         }
7502       }
7503
7504       {
7505         double y, u, v, frame_all;
7506         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
7507                                       &v, bit_depth, in_bit_depth);
7508         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
7509       }
7510       {
7511         double y, u, v, frame_all;
7512         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
7513                                 bit_depth, in_bit_depth);
7514         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
7515       }
7516     }
7517   }
7518
7519 #endif
7520
7521   if (is_one_pass_cbr_svc(cpi)) {
7522     if (cm->show_frame) {
7523       ++cpi->svc.spatial_layer_to_encode;
7524       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
7525         cpi->svc.spatial_layer_to_encode = 0;
7526     }
7527   }
7528
7529   vpx_clear_system_state();
7530   return 0;
7531 }
7532
7533 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
7534                               vp9_ppflags_t *flags) {
7535   VP9_COMMON *cm = &cpi->common;
7536 #if !CONFIG_VP9_POSTPROC
7537   (void)flags;
7538 #endif
7539
7540   if (!cm->show_frame) {
7541     return -1;
7542   } else {
7543     int ret;
7544 #if CONFIG_VP9_POSTPROC
7545     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
7546 #else
7547     if (cm->frame_to_show) {
7548       *dest = *cm->frame_to_show;
7549       dest->y_width = cm->width;
7550       dest->y_height = cm->height;
7551       dest->uv_width = cm->width >> cm->subsampling_x;
7552       dest->uv_height = cm->height >> cm->subsampling_y;
7553       ret = 0;
7554     } else {
7555       ret = -1;
7556     }
7557 #endif  // !CONFIG_VP9_POSTPROC
7558     vpx_clear_system_state();
7559     return ret;
7560   }
7561 }
7562
7563 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
7564                           VPX_SCALING vert_mode) {
7565   VP9_COMMON *cm = &cpi->common;
7566   int hr = 0, hs = 0, vr = 0, vs = 0;
7567
7568   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
7569
7570   Scale2Ratio(horiz_mode, &hr, &hs);
7571   Scale2Ratio(vert_mode, &vr, &vs);
7572
7573   // always go to the next whole number
7574   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
7575   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
7576   if (cm->current_video_frame) {
7577     assert(cm->width <= cpi->initial_width);
7578     assert(cm->height <= cpi->initial_height);
7579   }
7580
7581   update_frame_size(cpi);
7582
7583   return 0;
7584 }
7585
7586 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
7587                          unsigned int height) {
7588   VP9_COMMON *cm = &cpi->common;
7589 #if CONFIG_VP9_HIGHBITDEPTH
7590   update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
7591 #else
7592   update_initial_width(cpi, 0, 1, 1);
7593 #endif  // CONFIG_VP9_HIGHBITDEPTH
7594
7595 #if CONFIG_VP9_TEMPORAL_DENOISING
7596   setup_denoiser_buffer(cpi);
7597 #endif
7598   alloc_raw_frame_buffers(cpi);
7599   if (width) {
7600     cm->width = width;
7601     if (cm->width > cpi->initial_width) {
7602       cm->width = cpi->initial_width;
7603       printf("Warning: Desired width too large, changed to %d\n", cm->width);
7604     }
7605   }
7606
7607   if (height) {
7608     cm->height = height;
7609     if (cm->height > cpi->initial_height) {
7610       cm->height = cpi->initial_height;
7611       printf("Warning: Desired height too large, changed to %d\n", cm->height);
7612     }
7613   }
7614   assert(cm->width <= cpi->initial_width);
7615   assert(cm->height <= cpi->initial_height);
7616
7617   update_frame_size(cpi);
7618
7619   return 0;
7620 }
7621
7622 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
7623   cpi->use_svc = use_svc;
7624   return;
7625 }
7626
7627 int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; }
7628
7629 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
7630   if (flags &
7631       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
7632     int ref = 7;
7633
7634     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
7635
7636     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
7637
7638     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
7639
7640     vp9_use_as_reference(cpi, ref);
7641   }
7642
7643   if (flags &
7644       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
7645        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
7646     int upd = 7;
7647
7648     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
7649
7650     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
7651
7652     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
7653
7654     vp9_update_reference(cpi, upd);
7655   }
7656
7657   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
7658     vp9_update_entropy(cpi, 0);
7659   }
7660 }
7661
7662 void vp9_set_row_mt(VP9_COMP *cpi) {
7663   // Enable row based multi-threading for supported modes of encoding
7664   cpi->row_mt = 0;
7665   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
7666        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
7667       cpi->oxcf.row_mt && !cpi->use_svc)
7668     cpi->row_mt = 1;
7669
7670   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
7671       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
7672       !cpi->use_svc)
7673     cpi->row_mt = 1;
7674
7675   // In realtime mode, enable row based multi-threading for all the speed levels
7676   // where non-rd path is used.
7677   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
7678     cpi->row_mt = 1;
7679   }
7680
7681   if (cpi->row_mt)
7682     cpi->row_mt_bit_exact = 1;
7683   else
7684     cpi->row_mt_bit_exact = 0;
7685 }